#!/usr/local/bin/perl5
############################################################################
#
# diastxt2 by Ron Starr
#
#
# Peforms diastic reading of a text following method outlined in
# Jackson Mac Low, _The Virginia Woolf Poems_. Inspired by
# Charles O. Hartman's original DIASTEXT.
#
# Program does no error checking--you're on your own.
#
#
# Command-line options:
#
# -l <number> Max words per line of output (default 6)
# -o <number> Max words of output (default 25)
#
# Program reads from standard input. Input has the following structure:
#
# 1st line - key phrase (Mac Low calls it the "title phrase")
# 2nd and succeeding lines - the text to be read
#
# All output is to standard output.
#
#
# Revision History
# 06/30/99 First version
# 07/01/99 Added max words per line of output
# 07/14/99 Added command-line options
# 08/28/99 Bug fix - leading blanks in key treated as word
#
############################################################################
use Getopt::Std;
getopts ("l:o:");
# Set the max words of output.
$MAXWORDS = ($opt_o)? $opt_o : 25;
# Set the maximum number of words per line of output
$MAXPERLINE = ($opt_l)? $opt_l : 6;
#
# first line in file is the key phrase
#
# process the line containing the key phrase into search patterns
#
$keyline = <>;
chop $keyline; # get the line and whack newline
$keyline =~ s/[,;:.\?\"\']/ /g; # convert all punctuation to blanks
$keyline =~ s/^ *//g; # remove leading blanks (8/28 bug fix)
$keyline =~ s/ +/ /g; # convert multiple to single blanks
@keywords = split (/ /, $keyline); # split line into words on blanks
#
# generate the search patterns from the key words
#
$numpat=0;
for ($i=0; $i<=$#keywords; $i++) {
@letters = split (/ */, $keywords[$i]); # split each keyword into its letters
$patterns[$numpat++] = "^$letters[0]"; # first is special case
for ($j=1; $j <=$#letters; $j++) { # generate a pattern for each letter
$patterns[$numpat++] = "^\\w{$j}$letters[$j]";
}
}
#
# pull in the rest of the text
#
while(<>) {
$text = $text . $_ ;
}
#
# regularize whitespace and newlines in order to split text into words
#
$text =~ s/\s+\n/\n/g; # eliminate whitespace before newline
$text =~ s/\n\s+/\n/g; # eliminate whitespace after newlines
$text =~ s/\n/\n /g; # mutate embedded newlines (all by now)
# into nl + blank
$text =~ s/ +/ /g; # eliminate any multiple blanks...
@textwords = split (/ /, $text); # split it into "words" on blanks
#
# now, do the diastic reading, for a fixed number of output words....
#
$numpat = $numword = 0; # start with the first pattern & word
$wordsout = 0; # words printed on line counter
for ($i = 0; $i < $MAXWORDS; $i++) {
$wordschecked=0; # count the words checked
# to kick out if pattern doesn't exist
while() {
$testtext = $textwords[$numword];
$testtext =~ s/^\W+//; # whack anything that isn't a word part
if ($testtext =~ /$patterns[$numpat]/i) {
# pattern found....print the word and decide
# whether to output a newline
print $textwords[$numword];
# kick out newline if punctuation char ends string
$testtext .= "&&"; # tack characters on end to force recognition of \n at end
# this is a hack around DOS perl 4 "bug"
if($testtext =~ /([\W\n])&&$/m) {
if ($1 ne "\n") { print "\n"}
$wordsout = 0;
}
else {
print " "; # otherwise, add a blank
$wordsout++;
if($wordsout >= $MAXPERLINE) {
print "\n"; $wordsout = 0;
}
}
$numword = ($numword < $#textwords)? $numword + 1 : 0;
last;
}
else {
# pattern not found. bump the counter and test to see
# if we've been all the way through the text.
$wordschecked++;
if ($wordschecked > $#textwords +1) {
print "[pattern not found: $patterns[$numpat], skipping]\n";
last;
}
$numword = ($numword < $#textwords)? $numword + 1 : 0;
}
}
$numpat = ($numpat < $#patterns)? $numpat + 1 : 0;
}
print "\n";
exit (0);