#! /usr/bin/perl -w # srom: Sucks-Rules-O-Meter # Copyright 1998 Electric Lichen L.L.C. # Don Marti # 29 October 2001 -- added (Mac) OS X # 20 February 2001 -- switched to Raging Search # revised 15 January 2000 -- added OpenBSD. # revised 9 July 2000 -- added logging functionality # (Johan Walles, d92-jwa @ nada.kth.se) # revised 3 June 1999 -- new AltaVista result page format # patched 13 May 1999 -- for editors srom, http://www.tarunz.org/~vassilii/srom/ # revised 19 Mar 1998 -- added $rule_offset # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. require 5.004; # require "SimpleGet.pl"; use LWP::Simple; my $SEARCH_PREFIX = qq{http://altavista.com/web/results?q=%2B%22}; my $SEARCH_SUFFIX = '%22'; # The VOTES_THRESHOLD value determines how much the number of votes # for a certain OS affects its weighted result. OSes with exactly # this many votes will have a weighted score that is exactly in # between its raw score and the average rating for all operating # systems. For more on this, se the discussion of the "true Bayesian # estimate" below. my $VOTES_THRESHOLD = 25; my %aliases = ('vi' => ['vi', 'vim', 'vi improved'], 'emacs' => ['emacs', 'xemacs'], 'pico' => ['pico'], ); my %synonyms = ('sucks' => ['sucks', 'bites'], 'rules' => ['rules', 'rocks'] ); ########################################################################### warn "Warning: You have not provided any log file directory on the command line. No logs will be produced.\n" unless defined($ARGV[0]); $greatest = 1; foreach my $os (keys(%aliases)) { # Nuke some warnings $count{$os}{'sucks'} = 0; $count{$os}{'rules'} = 0; foreach my $alias (@{$aliases{$os}}) { foreach $quality ('sucks', 'rules') { foreach my $synonym (@{$synonyms{$quality}}) { my $uri = $SEARCH_PREFIX . lc("$alias+$synonym") . $SEARCH_SUFFIX; $uri .= '%20%2Beditor' if $alias eq 'vi'; # Avoid the `vi' Roman numeral $result = get($uri); if ($result =~ /found\s+([\d,]+)\D+results/i) { $raw = $1; $raw =~ s/\D//g; $count{$os}{$quality} += $raw + 0; print STDERR "$alias $synonym ($os $quality): $raw\n"; } } $greatest = $count{$os}{$quality} if $count{$os}{$quality} > $greatest; } } print "\n"; } die "bad AltaVista, bad, bad " if $greatest == 1; # print comment for easy conversion print ""; print qq{}; print qq{\n}; foreach my $os (sort(keys(%aliases))) { my $suckage = int (100* $count{$os}{'sucks'}/$greatest); my $suck_offset = 100 - $suckage; my $ruleage = int (100* $count{$os}{'rules'}/$greatest); my $rule_offset = 100 - $ruleage; print qq{\n}; print '"; print '"; } print "
 sucks rules
$os'; print qq{}; print qq{"; print qq{
}; if ($suck_offset >= 75) { print qq{}, $count{$os}{'sucks'} + 0, qq{}; } else { print " "; } print "}; if ($suckage > 25) { print qq{$count{$os}{'sucks'}}; } elsif ($suckage == 0) { } else { print " "; } print "
'; print qq{}; print qq{"; print qq{
}; if ($ruleage >= 25) { print qq{$count{$os}{'rules'}}; } elsif ($ruleage == 0) { } else { print " "; } print "}; if ($ruleage < 25) { print qq{$count{$os}{'rules'}}; } else { print " "; } print "
"; my $date = scalar(gmtime(time())); print qq{

Updated $date GMT.

}; # Log the retrieved data for later use by gnuplot # Has a log file directory name been specified on the command line? if (defined($ARGV[0])) { $logdirectory = $ARGV[0]; # Make sure that the log file directory exists if (! -e $logdirectory) { unless (mkdir $logdirectory,0777) { die "Error: Unable to create log file directory ($!)"; } warn "Warning: New log file directory $logdirectory created.\n"; } # Make sure that the log file directory is a directory die "Error: $logdirectory is not a directory!\n" unless (-d $logdirectory); # Find out what OSes already have log files foreach my $logfile_name (split /\n/,`ls $logdirectory/*.gnuplot 2> /dev/null`) { # Find out what OS the logfile is for by reading the comment # on the first line and stripping "# " from it open (LOGFILE, $logfile_name) or die "Error: Can't open $logfile_name for reading ($!)\n"; my $os_comment = ; chomp $os_comment; close LOGFILE or warn "Warning: Couldn't close $logfile_name ($!)"; (my $os) = ($os_comment =~ /^\# (.+)/) or die "Error: The first line of $logfile_name is not on '# OS-name' format"; die "Error: $os has more than one log file (at least $logfile_name and $logfile{$os})\n" if (defined $logfile{$os}); $logfile{$os} = $logfile_name; } # Create log files for operating systems that don't have one already foreach my $os (sort(keys(%aliases))) { if (! defined $logfile{$os}) { my $logfile_name = $os; $logfile_name =~ s {[^a-zA-Z0-9]+} {_}g; $logfile_name =~ tr/A-Z/a-z/; if (-e ($logdirectory . "/" . $logfile_name . ".gnuplot")) { my $counter = 1; while (-e ($logdirectory . "/" . $logfile_name . $counter . ".gnuplot")) { $counter++; } $logfile_name .= $counter; }; $logfile_name = $logdirectory . "/" . $logfile_name . ".gnuplot"; $logfile{$os} = $logfile_name; unless ((system "echo '# $os' > $logfile_name") / 256 == 0) { die "Error: Couldn't create file $logfile_name"; } warn "Warning: New log file created for $os ($logfile_name)\n"; } } # Calculate popularity percentages for each operating system. # This is done using the "true Bayesian estimate" as described # at the bottom of the Internet Movie Database's top 250 list # ("http://us.imdb.com/top_250_films"). The idea is that to get # very high or very low ratings, you have to have a lot of votes. # Operating systems with few votes will be pushed towards the middle # of the pack. # # Here's IMDb's description of the formula used: # weighted rank (WR) = (v / (v+m)) x R + (m / (v+m)) x C # where: # R = average for the movie (mean) = (Rating) # v = number of votes for the movie = (votes) # m = minimum votes required to be listed # C = the mean vote across the whole report # Calculate the average popularity of all operating systems my $sucks = 0; my $rules = 0; foreach my $os (sort(keys(%aliases))) { $sucks += $count{$os}{'sucks'}; $rules += $count{$os}{'rules'}; } die "Error: Weird sucks ($sucks) and rules ($rules) totals" if ($sucks <= 0 || $rules <= 0); my $average_popularity = $rules / ($sucks + $rules); # Create a datestring understandable by gnuplot ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = gmtime(time); # Avoid some warnings undef $sec; undef $min; undef $hour; undef $wday; undef $yday; undef $isdst; $mon++; # Convert mon to 1-12 instead of 0-11 $year += 1900; # Convert year to the real year my $datestring = $year . "-" . $mon . "-" . $mday; # YYYY-MM-DD # Log the current date + the popularity percentages to the log files foreach my $os (sort(keys(%aliases))) { # Verify that the date is not already present in the log file if (`grep "$datestring " $logfile{$os}`) { warn "Warning: $os log file $logfile{$os} already has an entry for today. Not adding one more.\n"; next; } # Calculate the popularity for this operating system my $sucks = $count{$os}{'sucks'}; my $rules = $count{$os}{'rules'}; my $votes = $sucks + $rules; if ($votes <= 0 || ($sucks <= 0 && $rules <= 0)) { warn "Warning: Data not available or illegal for $os. Sucks=$sucks, rules=$rules, votes=$votes"; next; } my $rating = $rules / ($sucks + $rules); my $popularity = $rating * ($votes / ($votes + $VOTES_THRESHOLD)) + $average_popularity * ($VOTES_THRESHOLD / ($votes + $VOTES_THRESHOLD)); $popularity *= 100; die "Error: $os popularity ($popularity) out of bounds" if ($popularity < 0 || $popularity > 100.0); # Open the operating system's log file for appending open (LOGFILE, ">>" . $logfile{$os}) or die "Error: Can't open $logfile{$os} for appending ($!)"; print LOGFILE "$datestring $popularity\n"; close LOGFILE or warn "Warning: Couldn't close log file $logfile{$os}"; } } exit 0;