#!/usr/bin/perl -w
require 5;
use strict;
$ENV{PATH}='/usr/local/bin:/bin:/usr/bin';
$ENV{IFS}=" \t\n";

# Grab paragraphs from rfc-index matching pattern.
# It's pretty cheap right now.
# BEGIN { unshift @INC, "/home/kragen/public_html"; }
use lib '/home/kragen/public_html';
use CGI;

my $query = new CGI;
my $searching = defined $query->param('pattern');

my ($pattern, $escaped_pattern);
if ($searching) {
	$pattern = $query->param('pattern');
	$pattern = quotemeta $pattern unless $query->param('regexp');
	$escaped_pattern = $query->escapeHTML($pattern);
	$pattern = "(?i)$pattern" if ((not defined ($query->param('case_sensitive'))) or 
		$query->param('case_sensitive') ne 'yes');
}

my $title = $searching ? "RFCs matching $escaped_pattern" : "Search RFC index";

print <<eoh;
Content-type: text/html

<html><head><title>$title</title></head>
<body
bgcolor="#ffffff" text="#000000" link="#0000ff" alink="#ff0000"
vlink="#aa00aa">
<h1>$title</h1>
eoh

print $query->start_form(-action => $ENV{'SCRIPT_NAME'}, -method=>'GET'), "\n",
      "<p> <em>Last updated 02022-10-03</em>\n<p>",
      "Search pattern: ", $query->textfield('pattern'), "\n",
      $query->checkbox(-name => 'case_sensitive', -label => 'Case sensitive',
      		-value => 'yes'), "\n",
      $query->checkbox(-name => 'regexp', -label => 'Regular Expression search',
      		-value => 'yes', -checked => 1), "\n",
      $query->submit, "\n",
      $query->end_form, "\n";

if ($searching) {
	my $records = 0;

	$/ = "";  # paragraphs
	open INPUT, "gzip -dc /home/kragen/public_html/rfc-index-entries.txt.gz |";
	while (<INPUT>) {
		if (/$pattern/o) {
			/^([\s]*)(\d+)(.*)/ or warn "Can't find the number: $_";
			my $number = $2;  $number =~ s/^0*//;
			/\(Format(?::[\s\n]*|=)([^)]*)\)/ or /\(Not online\)/ or 
				warn "Can't find the format: $_";
			my $formats = $1; my @formats = split /,\s*/, $formats;
			my $format;
			# This is pretty ugly.
			foreach $format (@formats) {
				# The format changed recently.  It used to be
				# (Format=.txt, .ps); now it's (Format: TXT=1824 bytes,
				# PS=1 bytes).  Then again they removed the =1824 bytes part.
				if ($format =~ /^\.[a-z]*$/) {
					#$_ .= qq(<a href="http://ds.internic.net/rfc/rfc$number$format">rfc$number$format</a> );
					#$_ .= qq(<a href="http://info.internet.isi.edu:80/in-notes/rfc/files/rfc$number$format">rfc$number$format</a> );
					#$_ .= qq(<a href="ftp://ftp.isi.edu/in-notes/rfc$number$format">rfc$number$format</a> );
					#$_ .= qq(<a href="http://www.rfc-editor.org/rfc/rfc$number$format">rfc$number$format</a> );
				} elsif ($format =~ /^([A-Z]+)(?:=\d+)?/) {
					my $ext = $1;  $ext =~ tr/A-Z/a-z/;
					# $_ .= qq(<a href="http://ds.internic.net/rfc/rfc$number.$ext">rfc$number.$ext</a> );
					#$_ .= qq(<a href="http://info.internet.isi.edu:80/in-notes/rfc/files/rfc$number.$ext">rfc$number.$ext</a> );
					$_ .= qq(<a href="http://www.rfc-editor.org/rfc/rfc$number.$ext">rfc$number.$ext</a> );
				}
			}
			$_ = "<p>$_\n";
			print;
			$records++;
		}
	}
	print "<hr> <p> Found $records RFCs.";
}

print <<eof;
<a href="rfc-index.pl">(source code, public domain)</a>
</body></html>
eof
