# ARTICLES - Reads wwwstat output for BYTE archive, reports frequency # of use of articles by issue (e.g. Feb 1995) or section (e.g. News and Views) # # Expects one or more files matching 9?????.htm containing wwwstat output. # Expects all.at containing mapping between URLs and article titles. # # Jon Udell, BYTE, jon_u@dev5.byte.com,judelL@bix.com # partial results for Jan 15, 1996: # global view # # 000910 January 1996 Cover Story The World's Fastest Computers # 000136 September 1995 20th Anniversary Top 20 Small Systems # 000111 September 1995 20th Anniversary 20 Most Important Chips # # # 960115: issue view # # 000910 January 1996 Cover Story The World's Fastest Computers # 000051 January 1996 The Byte Network Pro Server Management # 000038 January 1996 Pournelle Dead Chickens and Portable Data # 000033 January 1996 Editorial Web Sites: Don't Blink # 000019 January 1996 International News & Client/Server: Europe Trails U.S. # 000016 January 1996 International Featur A Consultant in Your PC # 000013 January 1996 International What's Video and TV Board for PCI Systems # 000011 January 1996 International What's HotMetal Pro Creates HTML Documents # # 960115: section view # # 000051 January 1996 The Byte Network Pro Server Management # 000033 December 1995 The Byte Network Pro Perl Magic # 000014 September 1995 The Byte Network Pro Web Search # 000014 August 1995 The Byte Network Pro Live Wire ($arg = $ARGV[0]) =~ s/\.htm//; $globmin = $ARGV[1]; # threshold: e.g. 1000 for entire history of site, 100 for 1 day $viewmin = $ARGV[2]; # threshold: e.g. 100 for entire history of site, 10 for 1 day open(LST, "all.at") || die "Cannot open all.at"; open(LOG, ">$arg.at1") || die "Cannot create $arg.at1"; open(SUM, ">$arg.at2") || die "Cannot create $arg.at2"; if ($arg eq "all") {$arg = "9?????.htm";} else {$arg = "$arg.htm";} $loghits = 0; # build a url array, and two related associative arrays keyed on url # 1. titles # 2. counts while() { chop; ($url,$issue,$section,$title) = split(/~/); push(@urls,$url); $titles{$url} = $issue . "~" . $section . "~" . $title; $counts{$url} = "000000"; } # process wwwstat output foreach $f (<${arg}>) { print $f . ' '; open(F,"$f") || die "Cannot open $f"; while () { tr/A-Z/a-z/; # limit to URLs that are really articles, no table of contents pages or gifs if (/\/art\/[0-9]+\// ||/\/art\/special\//|| /\/art\/bonus\// || /\/bmark\//) { s/^ +//; @line = split(/ \| /,$_,2); $stats = $line[0]; $url = $line[1]; chop($url); ($preqs,$pbytes,$bytes,$reqs) = split(/ +/,$stats,4); if ( (/\/bmark\// && /bytecpu\.exe/) || ( (! /\/bmark\//) && (! /gif/) && (! /\/[0-9]+\.htm/ ) && (! /\/sec[0-9]+\.htm/) ) ) { $counts{$url} = sprintf("%06d",$counts{$url}+$reqs); $loghits += $reqs; } } } close F; } foreach $url (@urls) # attach counts to titles { push (@subtots,$titles{$url} . "~~" . $counts{$url} ); } @subtots = sort @subtots; foreach $subtot (@subtots) # condense duplicate titles and add up subtotals { ($title,$count,$url) = split(/~~/,$subtot); if ($title ne $prevtitl) { push(@totals,sprintf("%06d",$totcount) . "~" . $prevtitl ); $totcount = $count; $prevtitl = $title; } else { $totcount += $count; } } foreach $total (reverse sort @totals) # break out totals by issue and section { ($hits,$issue,$section,$title) = split(/~/,$total); $_ = $total; #section views if ( /Editorial/) {push(@editorial,$total)}; if ( /Letters/) {push(@letters,$total)}; if (/News & Views/) {push(@news,$total)}; if (/Blasts/) {push(@blasts,$total)}; if ( /Book & CD/) {push(@books,$total)}; if ( /BYTE Awards/) {push(@awards,$total)}; if ( /Solutions Focus/) {push(@solutions,$total)}; if (/Features/) {push(@features,$total)}; if (/Byte Network Project/) {push(@netproj,$total)}; if (/State Of The Art/) {push(@sota,$total)}; if (/Special Report/) {push(@special,$total)}; if (/20th Anniversary/) {push(@anniv,$total)}; if ( /Cover Story/) {push(@cover,$total)}; if ( ( /Reviews/) && (! /Book/) ) {push(@reviews,$total)}; if ( /BYTE Lab/) {push(@lab,$total)}; if ( /Core Technologies/) {push(@core,$total)}; if ( /Pournelle/) {push(@pournelle,$total)}; if ( /What\'s New/) {push(@what,$total)}; if ( /Commentary/) {push(@comment,$total)}; #issue views if (/January 1996/) {push(@9601,$total)}; if (/December 1995/) {push(@9512,$total)}; if (/November 1995/) {push(@9511,$total)}; if (/October 1995/) {push(@9510,$total)}; if (/September 1995/) {push(@9509,$total)}; if (/August 1995/) {push(@9508,$total)}; if (/July 1995/) {push(@9507,$total)}; if (/June 1995/) {push(@9506,$total)}; if (/May 1995/) {push(@9505,$total)}; if (/April 1995/) {push(@9504,$total)}; if (/March 1995/) {push(@9503,$total)}; if (/February 1995/) {push(@9502,$total)}; if (/January 1995/) {push(@9501,$total)}; if (/December 1994/) {push(@9412,$total)}; if (/November 1994/) {push(@9411,$total)}; if (/October 1994/) {push(@9410,$total)}; if (/September 1994/) {push(@9409,$total)}; if (/August 1994/) {push(@9408,$total)}; if (/July 1994/) {push(@9407,$total)}; if (/June 1994/) {push(@9406,$total)}; if (/May 1994/) {push(@9405,$total)}; if (/April 1994/) {push(@9404,$total)}; if (/March 1994/) {push(@9403,$total)}; if (/February 1994/) {push(@9402,$total)}; if (/January 1994/) {push(@9401,$total)}; # other views if ( /BYTEmarks/) {push(@bytemarks,$total)}; $arthits += $hits; if ($hits > $globmin) # print summary part of report for items above global threshold { $s = sprintf("%-8.8s%-15.15s%-20.20s %-38.38s\n", $hits,$issue,$section,$title); print SUM $s; } } print SUM "Loghits total: $loghits\n"; print SUM "Arthits total: $arthits\n"; print LOG "Loghits total: $loghits\n"; print LOG "Arthits total: $arthits\n"; # do issue and section parts of report &View("bytemarks",@bytemarks); &View("issue",@9601); &View("issue",@9512); &View("issue",@9511); &View("issue",@9510); &View("issue",@9509); &View("issue",@9508); &View("issue",@9507); &View("issue",@9506); &View("issue",@9505); &View("issue",@9504); &View("issue",@9503); &View("issue",@9502); &View("issue",@9501); &View("issue",@9412); &View("issue",@9411); &View("issue",@9410); &View("issue",@9409); &View("issue",@9408); &View("issue",@9407); &View("issue",@9406); &View("issue",@9405); &View("issue",@9404); &View("issue",@9403); &View("issue",@9402); &View("issue",@9401); &View("section",@editorial); &View("section",@letters); &View("section",@news); &View("section",@blasts); &View("section",@books); &View("section",@awards); &View("section",@solutions); &View("section",@features); &View("section",@netproj); &View("section",@sota); &View("section",@special); &View("section",@anniv); &View("section",@cover); &View("section",@reviews); &View("section",@lab); &View("section",@core); &View("section",@pournelle); &View("section",@what); &View("section",@comment); sub View { local ($type,@view) = @_; print SUM "$type view\n"; $arthits = 0; foreach $total (reverse sort @view) { ($hits,$issue,$section,$title) = split(/~/,$total); $arthits += $hits; $s = sprintf("%-8.8s%-15.15s%-20.20s %-38.38s\n", $hits,$issue,$section,$title); if ($hits > $viewmin) {print SUM $s;} } }