#!/usr/local/bin/perl5
##########################################################
#
# markov2.pl
#
# a second version of markov to use the speed up gained in
# travesty2.pl
# by Ron Starr
#
#
# Generates a text matching word frequency of input text.
# The algorithm is identical to Travesty2.pl, except that
# the units manipulated are "words" rather than individual
# letters.
#
#
# Program does no error checking--you're on your own.
#
#
# Command-line options:
# -g <number> The granularity--the number of words to use to determine
# the next word. Defaults to 3.
# -o <number> The number of words to output. Defaults to 25.
#
#
# Program reads from standard input.
# All output is to standard output.
#
#
# Revision History
# 05/21/00 Version that constructs the table
#
###########################################################
use Getopt::Std;
getopts ("g:o:");
# Set the max letters of output.
$MAXWORDS = ($opt_o)? $opt_o : 25;
# Set the granularity
$GRAIN = ($opt_g)? $opt_g : 3;
if( $GRAIN < 1 ) { die "granularity must be >= 1\n"; }
# Set number of letters per line in output
$LETTERS_LINE = 65;
#
# pull in the text, break it into words, put in word array
#
while(<>) {
chop;
$text = $_ . " ";
#
# regularize whitespace in order to split text into words
#
$text =~ s/^\s+//g; # remove leading blanks
$text =~ s/\s+/ /g; # convert any whitespace to blanks
$text =~ s/ +/ /g; # eliminate any multiple blanks...
push @textwords, split (/ /, $text); # split text into "words"
}
#
# generate the frequency table
#
# calculate outer loop limits
$loopmax = $#textwords - ($GRAIN - 2) - 1;
# go through all lists of $GRAIN words in the text
for ($j = 0; $j < $loopmax; $j++) {
$key_string = "";
for ($k = $j; $k < $j + $GRAIN; $k++) {
# build the key string (GRAIN - 1) words
$key_string .= $textwords[$k];
}
$frequency_table{$key_string} .= $textwords[$j + $GRAIN] . " ";
}
# dump the table for debugging
#foreach $key (sort keys %frequency_table) {
# print "$key"," = ",$frequency_table{$key},"\n";
#}
#exit(0);
#
# generate the markov text
#
# set a buffer to nada
$buffer="";
# start with a seed of the first $GRAIN words from the text
for ($i = 0; $i < $GRAIN; $i ++) {
push @lastwords, $textwords[$i];
$buffer .= ($textwords[$i] . " ");
}
# now, do the actual generation
for ($i = 0; $i < $MAXWORDS; $i++) {
# see if the current last words are in the table
# construct the key string from the lastwords
$key_string = "";
for ($j = 0; $j < $GRAIN; $j++) { $key_string .= $lastwords[$j]; }
if ( exists $frequency_table{$key_string} ) {
# we have possible words
# split the list of words that follow the key string
@possible = split " ", $frequency_table{$key_string};
# select the next word
$nextword = $possible[rand @possible];
# add word to buffer and dump buffer if ready for output
$buffer .= ($nextword . " ");
if( ( length $buffer ) >= $LETTERS_LINE ) {
print $buffer,"\n";
$buffer="";
}
# adjust the lastwords array
for($l = 0; $l < $GRAIN - 1; $l++) { $lastwords[$l] = $lastwords[$l+1]; }
$lastwords[$GRAIN - 1] = $nextword;
}
else { # we drew a blank
# re-seed the generation with the first $GRAIN words from the text
@lastwords = ();
for ($l = 0; $l < $GRAIN; $l++) {
push @lastwords, $textwords[$l];
$buffer .= ($textwords[$l] . " ");
}
}
} # end $i loop
if( (length $buffer) > 0) { print $buffer, "\n"; $buffer = ""; }
exit (0);