################################################################################ # # Example "peg_ini.pl". # ################################################################################ use strict; use warnings; my $Is_Win32 = $^O eq 'MSWin32'; # Declare global vars set by peg. our (@Argv_end, $Bin_dir, $Code_on_match, $Code_on_match2, %Env, @Exclude_dirs, @Exclude_exts, @Exclude_files, @FileFind_opts, $HOME_dir, @Ini_files, $Newline, %Peg_longopt, %Peg_p, %Peg_Q, %Peg_z, %Peg_zz, @Perlexpr_mung, $Skip_dot_files, $Verbose, $Start_dir, $Consider_ctime); sub Warn { my $msg = join '', @_; $msg =~ s/\015?\012\z//; # chomp_ print STDERR "peg_ini: $msg\n"; } # Warn sub Die { Warn @_; exit(2); } # Die ################################################################################ # Edit these as desired. #push @Exclude_dirs, qw( # .git # blib #); # #push @Exclude_exts, qw( # obj #); ################################################################################ $Env{PEG_COLOR} = 'f=lg,c=ly,l=lc,b=lm,n=lw,m=lr,z=wob,y=lyor,k=lc'; $Env{PEG_OPTIONS} = "-IIIJJssT#+_\\"; # XXX Experimental. $Env{PEG_FS_LAYER} = ':utf8' if !$Is_Win32; ################################################################################ $Peg_p{c} = 'c:cpp:h:hpp:tcc:xs:y'; $Peg_p{h} = 'h:hpp'; $Peg_p{p} = 'pl:pm:pod:t'; $Peg_p{htm} = 'htm:html'; $Peg_p{jpg} = 'jpeg:jpg'; ################################################################################ sub save_cxt { $::Saved_Context_line = $::Context_line; $::Saved_Context_lineno = $::Context_lineno; } sub restore_cxt { $::Context_line = $::Saved_Context_line; $::Context_lineno = $::Saved_Context_lineno; } $Peg_z{c} = <<'EOT'; # PEG_FAST_Z_CONTEXT # PEG_Z_PRIMARY_COLOR ( # A multi line #define. Only valid while lines are \'d. (/^\#\s*define\s+\w+.*\\$/ and save_cxt(), $::Multi_line_define = 1) # context or (($::Multi_line_define and (/\\$/ ? undef # still in mld : ($::Multi_line_define == 2 ? (restore_cxt(), $::Multi_line_define = undef) # beyond mld : ($::Multi_line_define = 2))) # last line of mld ) and 0) # not context ) or ( # Functions. /^\w[\w\s\*\&:~]*\(/ # (1) looks like a function and not /^(?:if|for|switch|while)\b/ # (2) and isn't a statement and ( $::L = $_, $::L =~ s/\/\*.*?\*\/|\/[\*\/].*//g, # remove comments $::L !~ /[!^%;\"]/ # (3) and isn't a expression/statement ) ) or # An unnamed "typedef struct". (/^typedef\s+struct\s*(?:\{[^\}]*)?$/ and do {{ # Read forward to find the struct name! # Do the entire file in one pass. unless ($::Last_file eq $File) { $::Last_file = $File; %::Typedef_struct = (); my $start_pos = tell(F); my $start_line = $.; my $typedef_struct_line = $.; my $inside = 1; while () { if ($inside) { if (/^\}\s+(\w+)/) { $::Typedef_struct{$typedef_struct_line} = $1; $inside = undef; } } elsif (/^typedef\s+struct\s*(?:\{[^\}]*)?$/) { $typedef_struct_line = $.; $inside = 1; } } # Restore IO position. $. = $start_line; seek F, $start_pos, 0 or die "PEG_Z_C: cannot seek back in $File: $!\n"; } my $found; if (exists $::Typedef_struct{$.}) { $_ = "typedef struct " . $::Typedef_struct{$.} . " {" . $Newline; $found = 1; } $found; }}) or (/^(?:typedef\s+struct|struct|template)\s+\w+/ and not /[,;\)]/) or (/^class\s+\w+\s*$/) or ( # Clear the context if outside function/typedef scope. ($prev_line and $prev_line =~ /^\}/ and $Context_line = undef), ($prev_line = $_), undef ) EOT # C++ class. #$Peg_zz{c} = '/^class\s+\w+/ and not /;/'; # Java: method & class context. $Peg_z{j} = '/^\s*(?:\w+\s+)*(\w+)\s*\(.*\)\s*(\{|throws|$)/ and ' . '$1 !~ /^(?:if|for|while|switch|catch|synchronized)$/ and (s/^\s+//, 1)'; $Peg_zz{j} = '/^\s*(?:\w+\s+)*class/'; # Perl subroutines & POD. $Peg_z{p} = '/^(?:\s*sub\s+\w|=head|__(?:END|DATA)__)/'; # Tcl. $Peg_z{t} = '/^\s*(?:proc|namespace)\b/'; ################################################################################ # n2file() - given a FILENO returns the corresponding list of matching files. # Handles: (a) 22 (b) -1 (c) 1,2,3 (d) 1-3 (e) 1 2 3 (f) 1..3 (g) 1-2,3 etc. { my (@matches, $matches_are_fullpaths); sub n2file { my $get_fullpaths; # Default to relative paths, but: if (ref $_[0]) { # n2file(\0, ...) := return full paths $get_fullpaths = 1; shift; } if ($get_fullpaths xor $matches_are_fullpaths) { $matches_are_fullpaths = $get_fullpaths; @matches = (); } unless (@matches) { @matches = last_matches($get_fullpaths) or die "no matches found"; } my @n; foreach my $fileno (@_) { foreach my $r (split /[,\s]+/, $fileno) { # Assume "22-" or "22.." indicates 'to the end'. $r .= "0" if $r =~ /^\d+(?:-|\.\.)$/; if ($r =~ /^(\d+)(?:-|\.\.)(\d+)$/) { my ($from, $to) = ($1, $2); # Assume "44-7" means "44-47". if ($from >= 10 and $to <= 9 and $from =~ /(\d)$/ and $to > $1) { $to += $from - ($from % 10); } # Assume "22..0" means from 22 to the end. if ($to == 0) { $to = @matches; } die "bad range: $r" if $from > @matches or $from > $to; $to = @matches if $to > @matches; push @n, $from..$to; } elsif ($r =~ /^-?\d+$/) { push @n, $r; } else { die "bad fileno: $fileno"; } } } die "no FILENO found" unless @n; my @files; foreach my $n (@n) { my $idx = $n == 0 ? 0 : $n > 0 ? $n - 1 : $n; die "fileno $n out of range" if $idx >= @matches or $idx < -@matches; push @files, $matches[$idx]; } return wantarray ? @files : $files[0]; } } ################################################################################ # Convert a 'PERLEXPR' to a Perl expression. sub make_expr { my $pe = shift; unless ($pe =~ m{^[\+\$]|/}) { $pe = "/" . $pe . "/"; } eval "if (0 and ($pe)) {}"; $@ and die "bad PERLEXPR: $pe\n$@"; return $pe; } ################################################################################ =head2 B<--opt [LONGOPT]> Show help for peg longopts. If a B is specified then just the documentation for that longopt is shown; otherwise all the longopts are displayed along with their first line of POD. It assumes that longopts are defined in the following way: =head2 B<--opt-name> A brief one line description. More detailed description here eg. B<--opt-name> does I. etc. etc. =cut # Immediately followed by its definition. $Peg_longopt{'opt-name'} = sub { my ($argv_ref, $files_ref) = @_; # ... }; If the B<-V> verbose option is also used, then the Perl code for the longopt is also shown. =cut $Peg_longopt{opt} = sub { my $argv_ref = shift; if (@$argv_ref and $argv_ref->[0] eq '-V') { # Handle leading -V here. ++$Verbose; shift @$argv_ref; } my $opt = shift @$argv_ref; if (@$argv_ref and $argv_ref->[0] eq '-V') { # Handle trailing -V here. ++$Verbose; shift @$argv_ref; } # Build up hashes containing the POD and code for all the longopts defined # in the ini files. This assumes a consistent coding style! my (%pod, %code); foreach my $f (@Ini_files) { open my $fin, "<", $f or die "can't open $f: $!"; while (<$fin>) { if (/^=head2 B<--?([\w-]+)/) { my $o = $1; { do { push @{ $pod{$o} }, $_; last if /^=cut/; } while (<$fin>) } } if (/^\$Peg_longopt\{['"]?([\w-]+)/) { my $o = $1; { do { push @{ $code{$o} }, $_; last if /^(\$Peg_longopt.*)?\};$/; } while (<$fin>) } } } } if ($opt) { $opt =~ s/^--?//; die "no documentation found for '$opt'" unless exists $pod{$opt}; print "\n", pod2txt(join '', @{$pod{$opt}}); print "\n# Perl code =>\n\n", @{$code{$opt}} if $Verbose; } else { print "\n# Peg longopts =>\n\n"; foreach my $opt (sort keys %Peg_longopt) { next if $opt =~ /^help$/; # skip peg's builtin longopts. my $dots = '.' x (12 - length($opt)); my $descr = exists $pod{$opt} ? ${$pod{$opt}}[2] : ''; $descr =~ s/\015?\012\z//; # chomp_ $descr =~ s/\b[A-Z]<([^>]+)>/$1/g; # remove POD escapes. print " $opt $dots $descr\n"; } } exit; }; # Format POD into raw text. sub pod2txt { my $txt = shift; require Pod::PlainText; my $parser = Pod::PlainText->new(indent => 4, sentence => 0, width => 72); open(my $txt_fh, "<", \$txt) and open(my $out_fh, ">", \my $out_txt) or die "can't open: $!"; $parser->parse_from_filehandle($txt_fh, $out_fh); $out_txt =~ s/\015?\012\z//; # chomp_ return $out_txt; } ################################################################################ =head2 B<--find FINDARG> Find files matching the given argument. If the FINDARG is a simple string then files whose tail matches it are printed. Otherwise the FINDARG is taken as a PERLEXPR passed to B<-p>. For example, C, C or C. =cut $Peg_longopt{find} = sub { my $argv_ref = shift; @$argv_ref or die "expected TAILMATCH or /PATTERN/ argument"; my $p_arg = shift @$argv_ref; if ($p_arg =~ /^[\w\.\-]{2,}/) { $p_arg = "m," . quotemeta($p_arg) . "\[^\\\\/]*\$,i"; } Warn "-l +1 -p $p_arg"; unshift @$argv_ref, '-Y,p', '+1', '-ddlnp', $p_arg; }; ################################################################################ =head2 B<--pager> Pipe output thro a pager. This can be disabled with either B<--nopager> or B<--pagerx>. =cut $Peg_longopt{pager} = sub { my $argv_ref = shift; return if ($::Already_paging or grep /^--?(nopager|pagerx)$/, @$argv_ref or ! -t STDOUT); $::Already_paging = 1; unshift @$argv_ref, '-##'; my $less; foreach my $f ("C:/cygwin/bin/less.exe", "/usr/bin/less") { if (-x $f) { $less = $f; last; } } defined $less or die "failed to find a 'less' pager"; # less options: # -m = long-prompt. Shows "byte 1234" instead of ":". # -F = Quit if entire file fits on first screen. # -R = Output "raw" control characters. # -X = Don't use termcap init/deinit strings. open(PAGER_OUT, '|-', "$less -mFRX") or die "unable to pipe STDOUT via less: $!\n"; *STDOUT = \*PAGER_OUT; *STDERR = \*PAGER_OUT; }; ################################################################################ =head2 B<--pagerx> Option to comment out --pager on the cmdline. =cut $Peg_longopt{pagerx} = sub {}; ################################################################################ =head2 B<--loop PERLCODE> Run some I code for each previously matched file. The following Perl variables are defined: $_ filename $f filename $b backslashed version of filename $d directory $e escaped version of filename eg. "a/b c/Copy of d.pl" -> "a_b_c_Copy_of_d.pl" $E escaped version of filename in same directory eg. "a/b c/Copy of d.pl" -> "a/b c/Copy_of_d.pl" $t tail of filename eg. "Copy of d.pl" =cut $Peg_longopt{loop} = sub { my $argv_ref = shift; my $code = shift @$argv_ref; ($code and !@$argv_ref) or unshift(@$argv_ref, '--opt', 'loop'), return; $code =~ /\bunlink[^\(]/ and die "unlink? Use unlink(...) to override"; foreach my $f (last_matches()) { (my $b = $f) =~ tr|/|\\|; (my $d = $f) =~ s|(/)?[^/]+$| $1 ? '' : '.' |e; (my $e = $f) =~ s|[^\w\.\-]|_|g; (my $t = $f) =~ s|^(.*\/)||; my $Ed = $1 || ''; (my $Et = $t) =~ s|[^\w\.\-]|_|g; my $E = "$Ed$Et"; $_ = $f; print "\n=> $f\n"; no strict; # ??? eval $code; $@ and die "error with code: $code\n", $@; } exit; }; ################################################################################ =head2 B<--edit FILENO ...> Edit some of the last matched files. For example, C will edit the first and last matched files. =cut $Peg_longopt{edit} = sub { my $argv_ref = shift; # XXX This is user specific... edit as necessary. my $editor = $Is_Win32 ? 'C:/Program Files/Crimson Editor/cedt.exe' : 'gedit'; my $amp = $Is_Win32 ? '' : '&'; # run in background my @files = n2file(@$argv_ref); my %done; foreach my $file (@files) { next if $done{$file}++; my $size = -s $file; if ($size > 10_000_000) { Warn "file too large $file: $size"; next; } $file =~ tr|/|\\| if $Is_Win32; print "# $file\n"; system "\"$editor\" \"$file\" $amp"; } exit; }; ################################################################################ =head2 B<--vim FILENO> Open one of the last matched files in vim. =cut $Peg_longopt{vim} = sub { my $argv_ref = shift; @$argv_ref or die "expected FILENO"; my @files; foreach my $file (n2file(@$argv_ref)) { if ($Is_Win32 and $file =~ m|^/|) { # Need to add drive. XXX not sure why this is necesary. $Start_dir =~ m|^([a-z]:)/|i or die; my $drive = $1; $file =~ s|^/|$drive/| or die; } $file = "\"$file\"" if $file =~ /\s/; print "# $file\n"; push @files, $file; } system "vim -- " . join " ", @files; exit; }; ################################################################################ =head2 B<--duplicates> Find duplicate files. =cut $Peg_longopt{duplicates} = sub { my $argv_ref = shift; $::Sort_lengthwise = 0; if (@$argv_ref and $argv_ref->[0] eq '-lengthwise') { $::Sort_lengthwise = 1; shift @$argv_ref; Warn "sort duplicates filename lengthwise"; } unshift @$argv_ref, ( '-\ddR_%tt', '-PP' => q[ # Ensure that if there are no duplicates then "peg -=" does # not report the previous set of matches. BEGIN { if (!$::Do_this_only_once++) { open F, ">", $Last_matches_file or die; print F "no duplicates found\n"; close F; } }; # PEG_NO_FORK push @{$Z->{cksum($File)}}, $File; return; ], '-PPPP' => q[ my @duplicates; foreach my $cksum (keys %{$Z}) { my @dups = @{$Z->{$cksum}}; if (@dups > 1) { my $first = 1; @dups = sort { length($b) <=> length($a) } @dups if $::Sort_lengthwise; foreach my $duplicate (@dups) { print $duplicate, "\n"; push @duplicates, $duplicate unless $first; $first = 0; } print "\n\n"; } } @Matched_files = @duplicates; ], '-e' => '+die("should not see this")', ); }; ################################################################################ =head2 B<--ifdef> Get full C/C++ #if context. =cut $Peg_longopt{ifdef} = sub { my $argv_ref = shift; # Turn on both context matchers, but don't match. # We then set the #ifdef context into $Context_line2 using -P code. unshift @$argv_ref, "-z", "+0", "-zz", "+0"; $Env{PEG_CONTEXT_FORMAT2} = '$_'; $Env{PEG_Z_INDEPENDENT} = 1; unshift @$argv_ref, "-PPPPP", <<'EOT'; @::Cxt = (); EOT unshift @$argv_ref, "-P", <<'EOT'; # PEG_NEWLINE_NEUTRAL # Notes. # * some compilers allow whitespace preceding the '#' in preprocessor lines. # * does not handle backslash extended lines. if (/^\s*\#/) { my $new_cxt = 1; if (/^\s*\#\s*if(n?def)?\b/) { push @::Cxt, [$_, $.]; } elsif (/^\s*\#\s*elif\b/) { $::Cxt[$#::Cxt] = [$_, $.]; } elsif (/^(\s*\#\s*else)\b/) { my $else_line = $1; if (@::Cxt) { my $if_line = $::Cxt[$#::Cxt]->[0]; if ($if_line !~ /^\s*\#\s*elif/) { $if_line =~ s/[\n\r\t ]+\z//; $else_line = "$else_line /* $if_line */$Newline"; } else { $else_line = $_; } $::Cxt[$#::Cxt] = [$else_line, $.]; } else { # Found a #else before seeing a #if ! $new_cxt = 0; } } elsif (/^\s*\#\s*endif\b/) { pop @::Cxt; } else { $new_cxt = 0; } # Context_lineno2 is set to ensure correct ordering (handled by peg). if ($new_cxt) { if (@::Cxt) { $Context_line2 = ''; for (@::Cxt) { # trim trailing whitespace, and use native newline $_->[0] =~ s/[ \t\r\n]+\z//; $_->[0] .= $Newline; } # Minimize padding to ensure #'s aligned. my $max_lineno_len = 1; foreach my $cxt_elem (@::Cxt) { my (undef, $lineno) = @$cxt_elem; my $len = length $lineno; $max_lineno_len = $len if $len > $max_lineno_len; } foreach my $cxt_elem (@::Cxt) { my ($line, $lineno) = @$cxt_elem; my $pad = ' ' x (1 + $max_lineno_len - length($lineno)); $line =~ s/^\s+//; $Context_line2 .= "#### ($lineno)$pad$line"; } $Context_lineno2 = $.; } elsif ($Printed_Context_line2) { $Context_line2 = "#### *none*$Newline"; $Context_lineno2 = $.; } else { $Context_line2 = undef; } if (defined $Printed_Context_line2 and defined $Context_line2 and $Context_line2 eq $Printed_Context_line2) { # Ensure we don't reprint the same context eg. # #if CXT # ...match1 # #if SOMETHINGELSE # #endif # ...match2 // do not repeat CXT # $Context_line2 = undef; } } } EOT }; ################################################################################ =head2 B<--checkindent> Print lines that are not correctly I indented. NB. stops processing after a C<__END__> or C<__DATA__>. XXX this is coding-style & language specific. XXX this breaks the context options B<-ABC>. =cut $Peg_longopt{checkindent} = sub { my $argv_ref = shift; unshift @$argv_ref, '-nT', '-P' => 'last if /^__(?:END|DATA)__/;', # don't check tabs within POD '-P' => 'next if /^\t*( )?#+\t*( )?\S[^\t]*$/;', # ignore indented comments '-P' => 'next if /^\t*( )? \*( |\/|$)/;', # indented C comment '-e' => '/\S/ and not /^\t*( )?\S/', # bad leading whitespace '-e' => '/\S.*\t/', # a tab mid line '-e' => '/[ \t]$/', # trailing whitespace ; }; ################################################################################ =head2 B<--cksum> Print SHA-1 file checksums. To print MD5 checksums, C. =cut $Peg_longopt{cksum} = sub { my $argv_ref = shift; my $cksum = 'cksum_sha1'; if (@$argv_ref and $argv_ref->[0] =~ /md5/i) { shift @$argv_ref; $cksum = 'cksum_md5'; } my $code = <<'EOT'; print CKSUM($Filepath), " ", $Col{filename}, $File, $Col_Reset, "\n"; push @Matched_files, $File; return; EOT $code =~ s/CKSUM/$cksum/ or die; unshift @$argv_ref, '-%de', '+die("should not see this!")', '-PP' => $code; }; ################################################################################ =head2 B<--bsl> Process backslashed lines as one. =cut $Peg_longopt{bsl} = sub { my $argv_ref = shift; unshift @$argv_ref, '-P' => <<'EOT'; # PEG_SAFE_BEFORE_CONTEXT if (defined $orign) { $. = 1 + $orign; $orign = undef } if (/\\$/) { $startn = $. unless defined $l; $l .= $_; next } if (defined $l) { $_ = $l . $_; $orign = $.; $. = $startn; $l = undef } EOT }; ################################################################################ =head2 B<--pod> Only search B. =cut $Peg_longopt{pod} = sub { my $argv_ref = shift; unshift @$argv_ref, '-P' => <<'EOT'; next unless /^=[a-z]/ .. /^=cut/; # POD can start with head1/item/pod etc. EOT }; ################################################################################ =head2 B<--ipc> Ignore Perl comments & POD. =cut $Peg_longopt{ipc} = sub { my $argv_ref = shift; unshift @$argv_ref, '-P' => <<'EOT'; next if /^\#/; next if /^=[a-z]/ .. /^=cut/; # POD can start with head1/item/pod etc. last if /^__(?:END|DATA)__/; s/(? Ignore C comments. XXX not 100% accurate... but works in the typical cases. Needs a lexer style solution to handle cases such as C<"a /* comment in a string ">. =cut $Peg_longopt{icc} = sub { my $argv_ref = shift; unshift @$argv_ref, '-PPPPP' => <<'EOT'; $In_comment = 0; EOT unshift @$argv_ref, '-P' => <<'EOT'; if ($In_comment) { if (s|^.*?\*/||) { $In_comment = 0; } else { next; } } s|/\*.*?\*/||g; # /* ... */ s|//.*$||; # // ... if (s|/\*.*||) { $In_comment = 1; # NB. still search non comment part of line. } EOT }; ################################################################################ =head2 B<--tag> Print a I for each match that can be used by B<--tagv>. Each matched line is prefixed with a tag consisting of alphabetic characters. This tag can then be passed to B<--tagv> to view the matched line in F. Use B<--notag> to override this. =cut my $tagfile = $HOME_dir . ".peg_tags"; $Peg_longopt{tag} = sub { my $argv_ref = shift; return if grep /^--?(notag|tagv)$/, @$argv_ref; # cf. peg -tag foo -tagv a return if $::Tag; # guard against "peg --tag --tag ..." unshift @$argv_ref, "-PP" => "\n\t# PEG_NO_FORK\n"; # since $::Tag needs to be global! open TAGFILE, ">", $tagfile or die "can't write to $tagfile: $!"; eval "END { close TAGFILE }"; if (grep m|\bpager\b|, @ARGV) { select((select(\*TAGFILE), $| = 1)[0]); # autoflush } print TAGFILE cwd(), "\n"; # first line is the cwd. $::Tag = 'a'; # NB. tags may have gaps if -oo is used. $Code_on_match2 = <<'EOT'; BEGIN { local $_ = 'x'; colorall('X', 'lm'); $::Tagcol = $Col{'lm'} }; # hack my $tag = $::Tag++; print TAGFILE "$tag:$.:$File\n"; print $::Tagcol, $tag, ':', $Col_Reset; EOT }; ################################################################################ =head2 B<--tagv TAG> View a tagged line in F. See B<--tag>. =cut $Peg_longopt{tagv} = sub { my $argv_ref = shift; my $tag = shift @$argv_ref or die "expected TAG argument"; $tag =~ s/:$//; $tag =~ /^[a-z]+$/ or die "wonky tag argument: $tag"; open my $fin, "<", $tagfile or die "can't open $tagfile: $!"; my $cwd = <$fin>; chomp $cwd; my ($file, $lineno); while (<$fin>) { if (/^$tag:/og) { /(\d+):(.+)/g or die "unexpected tag file format: $_"; ($lineno, $file) = ($1, $2); last; } } die "match not found for $tag" unless $file; unless ($file =~ m|^(\w:)?[\\\/]|) { $file = $cwd . $file; # NB. cwd ends in a slash } close $fin; print "# ($lineno) $file\n"; system "vim +$lineno \"$file\""; exit; }; ################################################################################ =head2 B<--and PERLEXPR> Only test lines matching PERLEXPR. =cut $Peg_longopt{'and'} = sub { _andnot(1, @_) }; ################################################################################ =head2 B<--not PERLEXPR> Do not test lines matching PERLEXPR. It is exactly equivalent to C<--and !(PERLEXPR)>. =cut $Peg_longopt{'not'} = sub { _andnot(0, @_) }; ################################################################################ sub _andnot { my $and = shift; my $argv_ref = shift; @$argv_ref or die "expected PERLEXPR"; my $pe = shift @$argv_ref; $pe = make_expr($pe); push @Perlexpr_mung, sub { my $perlexpr_ref = shift; # NB. the order of expressions below ensures it is # the original PERLEXPR that gets colored. $$perlexpr_ref = $and ? "($pe) and ($$perlexpr_ref)" : "!($pe) and ($$perlexpr_ref)"; }; } ################################################################################ =head2 B<--fork> Set B eg. C This can be used to test different values for B. Also enables B<-%> since the main use for this option is to time different forking parameter values. =cut $Peg_longopt{'fork'} = sub { my ($argv_ref, $files_ref) = @_; my $r_fork = shift @$argv_ref or die "expected PEG_R_FORK argument"; $r_fork =~ /^\d(,\d{1,2})?$/ or die "bad PEG_R_FORK argument"; $Env{PEG_R_FORK} = $r_fork; Warn "--fork $r_fork"; unshift @$argv_ref, '-%'; }; ################################################################################ =head2 B<--idir DIR> or B<--idir DIR1:DIR2:...> Exclude the given directory names from being searched. Adds the given directory names to C<@Exclude_dirs>. =cut $Peg_longopt{'idir'} = sub { my ($argv_ref, $files_ref) = @_; @$argv_ref or die "expected DIR list"; my @dir_names = split /:+/, shift @$argv_ref; if (grep /[\\\/]/, @dir_names) { die "directory paths not supported; use -p instead"; } push @Exclude_dirs, @dir_names; }; ################################################################################ $Consider_ctime = 1; =head2 B<--mtimeonly> Only consider mtime (and not ctime) when using B<-M>. XXX Must be specified I B<-M> on the command line. =cut $Peg_longopt{'mtimeonly'} = sub { $Consider_ctime = 0; }; ################################################################################ $Skip_dot_files = 1; =head2 B<--dot> Don't ignore dot files/directories. =cut $Peg_longopt{'dot'} = sub { $Skip_dot_files = 0; }; ################################################################################ =head2 B<--follow> Follow symbolic links to directories (when using B). This ignores possible repetitions of directories. For instance, given the directories: a_link1 -> b_dir b_dir c_link2 -> b_dir Then only files beneath F will be processed (since it came first). =cut $Peg_longopt{'follow'} = sub { $Env{PEG_QFIND_ARGS} .= ' -l'; }; #$Peg_longopt{'followx'} = sub {}; ################################################################################ =head2 B<--followall> Follow B symbolic links to directories (when using B). Except where this leads to an infinite loop. =cut $Peg_longopt{'followall'} = sub { $Env{PEG_QFIND_ARGS} .= ' -L'; }; #$Peg_longopt{'followallx'} = sub {}; ################################################################################ =head2 B<--ccode> Strips C comments and string literals. =cut $Peg_longopt{'ccode'} = sub { my ($argv_ref, $files_ref) = @_; unshift @$argv_ref, '-PPPPP' => <<'EOT'; $In_comment = 0; EOT unshift @$argv_ref, '-P' => <<'EOT'; if ($In_comment) { if (s|^.*?\*/||) { $In_comment = 0; } else { next; } } s|/\*.*?\*/||g; # /* ... */ s|//.*$||; # // ... if (s|/\*.*||) { $In_comment = 1; # NB. still search non comment part of line. } s/\"(?:\\.|[^\"])*\"//g; # "C \"style\" string". EOT }; ################################################################################ =head2 B<--depth> Set B's depth argument. =cut $Peg_longopt{'depth'} = sub { my ($argv_ref, $files_ref) = @_; die "expected integer argument" unless @$argv_ref; my $depth = shift @$argv_ref; die "not an integer: $depth" unless $depth =~ /^[0-9]+$/; $Env{PEG_QFIND_ARGS} .= " -E=$depth"; }; ################################################################################ # Optimization: compile -Q code only if necesary. eval <<'EOT' if (grep /^-.*[QD]/, @ARGV); $@ and die $@; sub process_tar_slow { my ($file, $fullpath) = @_; my $cmd = "tar -tf \"$file\""; Warn "running $cmd" if $Verbose; my @filelist = `$cmd`; if ($? # Heuristic - seen "tar -tf" give correct results AND error code! and @filelist < 3 ) { Warn "failed to get file list from $fullpath: $?", @filelist; return 0; # signal to process the file as usual } foreach my $f (@filelist) { $f =~ s/\015?\012\z//; next if $f =~ m|/$|; # skip directory names next unless pp($f); $cmd = qq(tar -xOf "$file" "$f"); Warn "running $cmd" if $Verbose; open(my $fh, "$cmd|") or Die "can't extract $f from $fullpath: $!"; Q($fh, "$fullpath # $f", 1); close $fh; } return 1; } # process_tar_slow sub process_tar_fast { my ($file, $fullpath) = @_; my $cmd = "tar -xOf \"$file\""; my $fh; Warn "running $cmd" if $Verbose; if (!open($fh, "$cmd|")) { Warn "can't extract $fullpath: $!"; return 0; } Q($fh, $fullpath); close $fh; return 1; } # process_tar_fast # Process the contents of a .tar.gz file by file. sub process_targz_slow { my ($file, $fullpath) = @_; require File::Temp; my ($fh, $tempfile) = File::Temp::tempfile ("peg-targz-XXXXX", SUFFIX => '.tar', UNLINK => 1); close $fh; my $cmd = qq(gzip -dc "$file" > "$tempfile"); Warn "running $cmd" if $Verbose; system $cmd and Die "error: $cmd: $?"; process_tar_slow($tempfile, $fullpath); unlink $tempfile; return 1; } # process_targz_slow # Process the contents of a .tar.gz as one entity. sub process_targz_fast { my ($file, $fullpath) = @_; my $cmd = qq(gzip -dc "$file" | tar -xOf -); Warn "running $cmd" if $Verbose; my $fh; if (!open($fh, "$cmd|")) { Warn "can't extract $fullpath: $!"; return 0; } Q($fh, $fullpath); close $fh; return 1; } # process_targz_fast # Process each individual file within a ".zip" file. sub process_zip_slow { my ($file, $fullpath) = @_; my $cmd = "unzip -Z1 \"$file\" 2>&1"; Warn "running $cmd" if $Verbose; my @filelist = `$cmd`; if ($?) { Warn "unzip failed with $fullpath: $?", @filelist; return 0; # signal to process the file as usual } Warn "zip contains @{[ scalar @filelist ]} files" if $Verbose; foreach my $f (@filelist) { $f =~ s/\015?\012\z//; next unless pp($f); my $cmd = qq(unzip -p "$file" "$f"); Warn "running $cmd" if $Verbose; open(my $fh, "$cmd|") or Die "can't extract $f from $fullpath: $!"; Q($fh, "$fullpath # $f", 1); close $fh; } return 1; } # process_zip_slow # Process the entire contents inside a ".zip" file as one. sub process_zip_fast { my ($file, $fullpath) = @_; my $cmd = qq(unzip -p "$file"); Warn "running $cmd" if $Verbose; open(my $fh, "$cmd|") or Die "can't unzip $fullpath: $!"; Q($fh, $fullpath); close $fh; return 1; } # process_zip_fast sub process_gz { my ($file, $fullpath) = @_; my $cmd = qq(gzip -dc "$file"); Warn "running $cmd" if $Verbose; open(my $fh, "$cmd|") or Die "error: $cmd: $!"; Q($fh, $fullpath); close $fh; return 1; } # process_gz sub process_pdf { my ($file, $fullpath) = @_; require File::Temp; my ($fh, $tempfile) = File::Temp::tempfile ("peg-pdf-XXXXX", SUFFIX => '.pdf', UNLINK => 1); close $fh; my $cmd = "pdftotext \"$file\" $tempfile"; Warn "running $cmd" if $Verbose; system $cmd; if ($?) { Warn "pdftotext failed: $?"; unlink $tempfile; return 0; } unless (open($fh, "<", $tempfile)) { Warn "could not open $tempfile: $!"; unlink $tempfile; return 0; } Q($fh, $fullpath); close $fh; unlink $tempfile; return 1; } # process_pdf sub process_tar { return process_tar_slow(@_) if pp(); Warn "use -pp /./ to search each file within the tar file" unless $::Done_use_pp_warning++; return process_tar_fast(@_); } # process_tar sub process_targz { return process_targz_slow(@_) if pp(); Warn "use -pp /./ to search each file within the tar.gz file" unless $::Done_use_pp_warning++; return process_targz_fast(@_); } # process_targz sub process_zip { return process_zip_slow(@_) if pp(); Warn "use -pp /./ to search each file within the zip file" unless $::Done_use_pp_warning++; return process_zip_fast(@_); } # process_zip %Peg_Q = ( 'pdf' => \&process_pdf, '*gz' => \&process_gz, '*tar' => \&process_tar, '*tar.gz' => \&process_targz, '*zip' => \&process_zip, ); EOT ################################################################################ sub mv { @_ == 2 or die "Usage: mv(SRC, DEST)\n"; my ($src, $dest) = @_; defined $src or die "mv: undefined SRC\n"; defined $dest or die "mv: undefined DEST\n"; -f $src or die "mv: SRC does not exist: $src\n"; -f $dest and die "mv: DEST exists: $dest\n"; # NB. DEST may be a DIR require File::Copy; File::Copy::move($src, $dest) or die "mv: failed: $!\n"; } # mv sub cp { @_ == 2 or die "Usage: cp(SRC, DEST)\n"; my ($src, $dest) = @_; defined $src or die "cp: undefined SRC\n"; defined $dest or die "cp: undefined DEST\n"; -f $src or die "cp: SRC does not exist: $src\n"; -f $dest and die "cp: DEST exists: $dest\n"; # NB. DEST may be a DIR require File::Copy; File::Copy::copy($src, $dest) or die "cp: failed: $!\n"; } # cp # Provide a checksum subroutine: sub cksum { @_ == 1 or die "Usage: cksum(FILE)"; return cksum_sha1(@_); } sub cksum_sha1 { @_ == 1 or die "Usage: cksum_sha1(FILE)"; return do_cksum("SHA-1", @_); } sub cksum_md5 { @_ == 1 or die "Usage: cksum_md5(FILE)"; return do_cksum("MD5", @_); } sub do_cksum { require Digest; my ($type, $file) = @_; open my $fin, "<", $file or return "cksum: can't open $file: $!"; binmode $fin; my $ctx = Digest->new($type); $ctx->addfile($fin); my $cksum = $ctx->b64digest(); close $fin; return $cksum; } # do_cksum ################################################################################ # # A Win32 optimized version of File::Find::find. # if ($Is_Win32 and 1 and grep /^-.*[dt]/, @ARGV) { eval <<'EOT'; $INC{'File/Find.pm'} = __FILE__; # makes "require File::Find" a NOP. $File::Find::Mtime = 0; # ensure defined sub File::Find::find { my ($wanted, @dirs) = @_; my $callback = $wanted->{wanted}; my $silent = $wanted->{silent}; my $pp = $wanted->{preprocess}; for (@dirs) { # Ensure there is a trailing "/" on all directory names. $_ .= '/' unless m|[\\/]$| or ($Is_Win32 and /^[a-z]:$/); } @dirs = reverse @dirs; my (@d, @f, %M); while (defined (my $dir = pop @dirs)) { opendir my $dirh, $dir or ($silent || print STDERR "peg: can't opendir $dir: $!\n"), next; @d = @f = %M = (); $dir =~ s|^\.[/\\]||; while (defined (my $f = readdir $dirh)) { next if ($f eq '.' or $f eq '..'); if (-d "$dir$f") { push @d, $f; } else { push @f, $f; $M{$f} = _M(); # NB. respect $::Consider_ctime. } } closedir $dirh; if (@f) { @f = $pp->(@f) if $pp; foreach my $f (@f) { $File::Find::name = $_ = "$dir$f"; $File::Find::Mtime = exists $M{$f} ? $M{$f} : 0; $callback->(); # allow errors to propagate to caller. } } if (@d) { @d = $pp->(@d) if $pp; push @dirs, reverse map "$dir$_/", @d; } } $File::Find::Mtime = 0; } EOT die $@ if $@; } ################################################################################ # Avoid "used only once" warnings. 1 or ($File::Find::name, $File::Find::name);