Zuletzt geändert: Sa, 07.10.2006

«K12/K13» entropy.pl «PDF», «POD»



Download
#!/usr/bin/perl

use warnings;
use strict;
use utf8;

use List::Util     qw< sum >;
use Compress::Zlib;
use Gtk2 -init;
use Gtk2::SimpleList;

my $window = Gtk2::Window->new;
$window->set_title("Entropie");
$window->signal_connect(delete_event => sub { Gtk2->main_quit });

my $vbox = Gtk2::VBox->new(0,5);

# Haupt-Eingabefeld
my $buf;
{
  my $frame = Gtk2::Frame->new("Eingabe");
  my $sw    = Gtk2::ScrolledWindow->new;
  $sw->set_policy("automatic", "automatic");

  my $entry = Gtk2::TextView->new;
  $entry->set_wrap_mode("word");
  $buf      = $entry->get_buffer;
  $entry->get_buffer->signal_connect(changed => sub { once(50 => \&show_entropy) });

  if(@ARGV) {
    local $/;
    $buf->insert_at_cursor(my $text = scalar <>);
    print "Entropie von ARGV: " . (calc_entropy($text))[3] . "\n";
  }

  $entry->set_size_request(500, 100);
  $sw->add($entry);
  $frame->add($sw);
  $vbox->pack_start($frame, 0, 0, 0);
}

{
  my %timeouts;

  sub once {
    my ($delay, $code) = @_;

    Glib::Source->remove($timeouts{$code}) if $timeouts{$code};
    $timeouts{$code} = Glib::Timeout->add($delay => sub { $code->(); return });
  }
}

my (
  $normalize,
  $textlen_label,
  $entropy_label,
  $gzip_total_label, $gzip_ratio_label, $gzip_entropy_label,
);
{
  my $frame = Gtk2::Frame->new("Informationsgehalt pro Zeichen");
  my $sw    = Gtk2::ScrolledWindow->new;
  $sw->set_policy("automatic", "automatic");

  my $list = Gtk2::SimpleList->new(
    "Zeichen"         => "text",
    "Unicode"         => "text",
    "Abs. Häufigkeit" => "int",
    "Rel. Häufigkeit" => "text",
    "Informationsgehalt in bit" => "text",
  );

  $list->set_size_request(500, 200);
  $sw->add($list);

  $frame->add($sw);
  $vbox->pack_start($frame, 0, 0, 0);

  sub show_entropy {
    my $text = $buf->get_text($buf->get_start_iter => $buf->get_end_iter, 0);
    $text = lc $text, $text =~ s/\s//g if $normalize->get_active;

    my ($abscount, $prob, $information, $entropy) = calc_entropy($text);

    $entropy_label->set_label(sprintf '%0.5f bit', $entropy || 0);

    my $gzip = Compress::Zlib::memGzip($text);
    $gzip_total_label->set_label(sprintf '%d B', length $gzip);
    $gzip_ratio_label->set_label(
      length $text
        ? sprintf '%0.5f', length($gzip) / length $text
        : "∞"
    );
    $gzip_entropy_label->set_label(sprintf '%0.05f bit', (calc_entropy($gzip))[3]);

    $textlen_label->set_label(sprintf '%d B', length $text);

    @{ $list->{data} } = map {[
      $_,
      sprintf('U+%04d', ord $_),
      $abscount->{$_},
      sprintf('%0.5f', $prob->{$_}),
      sprintf('%0.5f', $information->{$_}),
    ]} sort {
      $information->{$b} <=> $information->{$a} ||
                      $a cmp $b
    } keys %$abscount;
  }

  sub calc_entropy {
    my $text = shift;

    my %abscount;
    $abscount{$_}++ for split //, $text;

    my %prob;
    $prob{$_} = $abscount{$_} / length $text for keys %abscount;

    my %information;
    $information{$_} = -log($prob{$_}) / log 2 for keys %prob;

    my $entropy = sum map { $prob{$_} * $information{$_} } keys %prob;

    return \%abscount, \%prob, \%information, $entropy;
  }
}

{
  my $frame = Gtk2::Frame->new("Gesamtstatistik");
  my $vbox_ = Gtk2::VBox->new;

  for(
    [ "Textlänge"                           => \$textlen_label ],
    [ "Entropie"                            => \$entropy_label ],
    [ "Komprimiert mit gzip, Länge"         => \$gzip_total_label ],
    [ "Komprimiert mit gzip, auf Textlänge" => \$gzip_ratio_label ],
    [ "Komprimiert mit gzip, Entropie"      => \$gzip_entropy_label ],
  ) {
    my ($descr, $mod_label) = @$_;

    my $hbox = Gtk2::HBox->new;
    my $descr_label = Gtk2::Label->new("$descr:");
    $$mod_label     = Gtk2::Label->new;
    $descr_label->set_alignment(0, 0);
    $$mod_label ->set_alignment(1, 0);

    $hbox->add($descr_label);
    $hbox->add($$mod_label);
    $vbox_->add($hbox);
  }

  $frame->add($vbox_);
  $vbox->pack_start($frame, 1, 1, 0);
}

# Normalisierungstickbox
{
  my $frame = Gtk2::Frame->new("Optionen");

  $normalize = Gtk2::CheckButton->new_with_label("Normalisierung");
  $normalize->set_active(1);
  $normalize->signal_connect(toggled => \&show_entropy);

  $frame->add($normalize);
  $vbox->pack_start($frame, 1, 1, 0);
}

# MainLoop
show_entropy();

$window->add($vbox);
$window->show_all();
Gtk2->main();