#!/usr/bin/perl -w

# assign1a.pl output processor
# (c)2003 Stepan Roh
#
# outputs useful values and averages entropy from assign1a.pl log files
#
# usage: cat file1.log file2.log ... | ./assign1a_proc.pl > output.txt

printf "%-12s  %-4s  %-6s   %-6s  %-6s  %-6s  %-6s  %-6s \n", 'file', 'mess', 'mess(%)', 'freq1', 'freq1(%)', 'min H', 'avg H', 'max H';
while (<STDIN>) {
  chomp ($_);
  my ($file, $mess, $mess_pc, $pass, $words, $chars, $chars_per_word, $distinct, $freq_1_word,
  $most_freq_word, $most_freq_word_count, $entropy, $perplexity) = split (/\s*,\s*/, $_);
  # data header
  next if ($file eq 'file');
  # original data line
  if ($mess eq '-') {
    printf "%-12s  %-4s  %-6s   %6d  %6.2f%%    %-6s  %6.4f  %-6s\n", $file, '-', '-', $freq_1_word, $freq_1_word / $words * 100, '-', $entropy, '-';
    next;
  }
  # averaging
  if ($pass == 1) {
    $sum_freq_1_word = 0;
    $sum_words = 0;
    $sum_ent = 0;
    $min_ent = $max_ent = $entropy;
  }
  $sum_freq_1_word += $freq_1_word;
  $sum_words += $words;
  $sum_ent += $entropy;
  ($min_ent = $entropy) if ($entropy < $min_ent);
  ($max_ent = $entropy) if ($entropy > $max_ent);
  if ($pass == 10) {
    $freq_1_word = $sum_freq_1_word / 10;
    $words = $sum_words / 10;
    $avg_ent = $sum_ent / 10;
    printf "%-12s  %-4s  %6.3f%%  %6d  %6.2f%%    %6.4f  %6.4f  %6.4f\n", $file, $mess, $mess_pc, $freq_1_word, $freq_1_word / $words * 100, $min_ent, $avg_ent, $max_ent;
  }
}

1;
