#!/usr/local/bin/perl5
############################################################################
#
# diastxt2 by Ron Starr
#
#
# Peforms diastic reading of a text following method outlined in
# Jackson Mac Low, _The Virginia Woolf Poems_. Inspired by 
# Charles O. Hartman's original DIASTEXT.
#
# Program does no error checking--you're on your own.
#
#
# Command-line options:
#
#	-l <number>	Max words per line of output (default 6)
#	-o <number>	Max words of output (default 25)
#
# Program reads from standard input. Input has the following structure:
#	
#	1st line - key phrase (Mac Low calls it the "title phrase")
#	2nd and succeeding lines - the text to be read
#
#	All output is to standard output.
#
#
# Revision History
#	06/30/99	First version
#	07/01/99	Added max words per line of output
#	07/14/99	Added command-line options 
#	08/28/99	Bug fix - leading blanks in key treated as word
#
############################################################################
 
use Getopt::Std;
getopts ("l:o:");

# Set the max words of output.
$MAXWORDS = ($opt_o)? $opt_o : 25;

# Set the maximum number of words per line of output
$MAXPERLINE = ($opt_l)? $opt_l : 6;

#
# first line in file is the key phrase
#
# process the line containing the key phrase into search patterns
#
$keyline = <>;

chop $keyline;					# get the line and whack newline
$keyline =~ s/[,;:.\?\"\']/ /g;			# convert all punctuation to blanks
$keyline =~ s/^ *//g;				# remove leading blanks (8/28 bug fix)
$keyline =~ s/ +/ /g;				# convert multiple to single blanks
@keywords = split (/ /, $keyline);		# split line into words on blanks

#
# generate the search patterns from the key words
#
$numpat=0;
for ($i=0; $i<=$#keywords; $i++) {

	@letters = split (/ */, $keywords[$i]);	# split each keyword into its letters
	$patterns[$numpat++] = "^$letters[0]";	# first is special case
	for ($j=1; $j <=$#letters; $j++) {	# generate a pattern for each letter
		$patterns[$numpat++] = "^\\w{$j}$letters[$j]";
	}
}


#
# pull in the rest of the text
#
while(<>) {
	$text = $text . $_ ;
}

#
# regularize whitespace and newlines in order to split text into words
#
$text =~ s/\s+\n/\n/g;				# eliminate whitespace before newline
$text =~ s/\n\s+/\n/g;				# eliminate whitespace after newlines
$text =~ s/\n/\n /g;				# mutate embedded newlines (all by now) 
						# into nl + blank
$text =~ s/ +/ /g;				# eliminate any multiple blanks...
@textwords = split (/ /, $text);		# split it into "words" on blanks

#
# now, do the diastic reading, for a fixed number of output words....
#

$numpat = $numword = 0;				# start with the first pattern & word
$wordsout = 0;					# words printed on line counter

for ($i = 0; $i < $MAXWORDS; $i++) {

	$wordschecked=0;			# count the words checked 
						# to kick out if pattern doesn't exist
	while() {
		$testtext = $textwords[$numword];
		$testtext =~ s/^\W+//;		# whack anything that isn't a word part
	
		if ($testtext =~ /$patterns[$numpat]/i) {
			# pattern found....print the word and decide
			# whether to output a newline
			print $textwords[$numword];			
			# kick out newline if punctuation char ends string
			$testtext .= "&&";      # tack characters on end to force recognition of \n at end
						# this is a hack around DOS perl 4 "bug"
			if($testtext =~ /([\W\n])&&$/m) {
				if ($1 ne "\n") { print "\n"}
				$wordsout = 0;
			}	
			else {
				print " ";	# otherwise, add a blank
				$wordsout++;
				if($wordsout >= $MAXPERLINE) {
					print "\n"; $wordsout = 0;
				}
			} 
			$numword = ($numword < $#textwords)? $numword + 1 : 0; 
			last;
		}
		else {
			# pattern not found. bump the counter and test to see
			# if we've been all the way through the text.
			$wordschecked++;
			if ($wordschecked > $#textwords +1) {
				print "[pattern not found: $patterns[$numpat], skipping]\n";
				last;
			} 
			$numword = ($numword < $#textwords)? $numword + 1 : 0;
		}
	}
	$numpat = ($numpat < $#patterns)? $numpat + 1 : 0; 
}
print "\n";

exit (0);