Pulling Stats from Toastmasters International Educational Archive
In case this helps any of the data gronkers out there:
Pulling TM educational stats together in Perl:
#!/usr/bin/perl
use strict;
use warnings;
use Cache::FileCache;
use LWP::Simple;
my $cache = new Cache::FileCache
(
{ 'default_expires_in' => 60*60*24 }
);
my(@districts) = qw(
1 2 3 4 5 6 7 8 9 10
11 12 13 14 15 16 17 18 19 20
21 22 23 24 25 26 27 28 29 30
31 32 33 34 35 36 37 38 39 40
41 42 43 44 45 46 47 49 50
51 52 53 54 55 56 57 58 59 60
61 62 63 64 65 66 67 68 69 70
71 72 73 74 75 76 77 78 79 80
81 82 83 84 85 86 87
99
);
my($urlformat) = 'http://reports.toastmasters.org/historical/dprReports.cfm?r=3&d=%d';
my(%awardtype) = (
'ACB' => 'Communication',
'ACG' => 'Communication',
'ACGC' => 'Communication',
'ACS' => 'Communication',
'ALB' => 'Leadership',
'ALS' => 'Leadership',
'AS' => 'Leadership',
'CC' => 'Communication',
'CL' => 'Leadership',
'CTM' => 'Communication',
'DTM' => 'Leadership',
'LDREXC' => 'Leadership',
'OATM' => 'Communication',
'PACB' => 'Communication',
'PACG' => 'Communication',
'PACS' => 'Communication',
'PALB' => 'Leadership',
'PALS' => 'Leadership',
'PCC' => 'Communication',
'PCL' => 'Leadership',
'PDTM' => 'Leadership',
'PLE' => 'Leadership',
);
my(%awardlevel) = (
'ACB' => 'Advanced',
'ACG' => 'Advanced',
'ACGC' => 'Advanced',
'ACS' => 'Advanced',
'ALB' => 'Advanced',
'ALS' => 'Advanced',
'AS' => 'Advanced',
'CC' => 'Basic',
'CL' => 'Basic',
'CTM' => 'Basic',
'DTM' => 'Advanced',
'LDREXC' => 'Advanced',
'OATM' => 'Advanced',
'PACB' => 'Advanced',
'PACG' => 'Advanced',
'PACS' => 'Advanced',
'PALB' => 'Advanced',
'PALS' => 'Advanced',
'PCC' => 'Basic',
'PCL' => 'Basic',
'PDTM' => 'Advanced',
'PLE' => 'Advanced',
);
open(OUT, '>', 'foo.tsv') or die("couldn't open foo.tsv: $!");
printf OUT ("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n", "District","Club","Division","Area","Award","Date","Member","Name","Location","Award Type","Award Level");
for my $d (@districts) {
my $url = sprintf($urlformat, $d);
my $content;
if($content = $cache->get($url)) {
#
} else {
$content = get $url;
$cache->set($url, $content);
}
while($content =~ m!
\s*
<tr \s+ bgcolor="[ef]{6}">
\s*
<td \s+ class="content" \s+ valign="top"> \s* ([^<]+?) \s* </td>
\s*
<td \s+ class="content" \s+ valign="top"> \s* ([^<]+?) \s* </td>
\s*
<td \s+ class="content" \s+ valign="top"> \s* ([^<]+?) \s* </td>
\s*
<td \s+ class="content" \s+ valign="top"> \s* ([^<]+?) \s* </td>
\s*
<td \s+ class="content" \s+ valign="top"> \s* ([^<]+?) \s* </td>
\s*
<td \s+ class="content" \s+ valign="top"> \s* ([^<]+?) \s* </td>
\s*
<td \s+ class="content" \s+ valign="top"> \s* ([^<]+?) \s* </td>
\s*
<td \s+ class="content" \s+ valign="top"> \s* ([^<]+?) \s* </td>
\s*
</tr>
!xsg) {
printf OUT ("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n",
$d,
$1, $2, $3, $4, $5, $6, $7, $8,
$awardtype{$4},
$awardlevel{$4},
);
}
}