#!/usr/local/bin/perl
#
# Original version by Sam Ruby, written in Python.
# Ported to Perl, and enhanced by Jim Jagielski
# Enhanced to have links to personal pages by Sam Ruby.
# Start making it aware of signed CLAs
#
# Usage: makestats.pl [-c]
#  creates the projects.html (or committers.html, if the -c
#  command line arg is used) which lists the ASF CVS project
#  modules and the committers of those modules. If they are
#  also ASF members, their CVS/SVN id and name will be in bold.
#  This must be run on cvs.apache.org, to read the correct
#  avail and passwd files. Also, the iclas.txt file
#  (from the foundation svn tree) should be located
#  in the same dir as this script.
#
use strict;

# The orig version used Python's 'in' method to avoid
# duplicate entries in the arrays. Perl lacks such a
# thingie so we use a function. Needless to say, this
# is ugly and slow, and a much better way would be to
# also store the added entries in a hash and then
# check for the existance of that key before adding...
# maybe later.

sub isin {
  my $val = shift(@_);
  my $foo;
  foreach $foo (@_) {
    if ($val eq $foo) {
      return 1;
    }
  }
  return 0;
}

# "globals"
my ($iam, $title, $timestamp);
# temp vars
my ($line, @data, @modules, @dummy, @clasnocommit);
# autocreated hashs
my (%fullname, %members, %clas, %website, %module, %tally);
# data construction
my ($id, $name, $row, $rows, $col, $cols);
# printvars
my ($header, $listocvs, $cvss, $a1, $a2, $b1, $b2);

if ($ARGV[0] eq "-c") {
  $iam = "committers";
  $title = "Apache Committers";
} else {
  $iam = "projects";
  $title = "Apache Committers by Project Modules";
}

$timestamp = `date`;
open(STDOUT, ">${iam}.html") || die "Cannot open ${iam}.html\n";

# populate fullname hash from passwd file
open(PASSWD, '/etc/passwd') || die "Cannot open passwd file\n";
while ($line = <PASSWD>) {
  chomp $line;
  @data = split(':', $line);
  $fullname{$data[0]} = (split(',', $data[4]))[0];
}
close PASSWD;

# Now populate the members array
open(MEMBERS, '/etc/group') || die "Cannot open /etc/group file\n";
while ($line = <MEMBERS>) {
  next unless $line =~ s/^member:\*:\d+://;
  chomp $line;
  %members = map {$_, $_} split(/,/, $line);
}
close MEMBERS;

# Now populate the clas array
#  simple for now... just store the avail IDs
#  and prefer using their "public" name as their
#  fullname
open(CLAS, './iclas.txt') || die "Cannot open iclas.txt file\n";
while ($line = <CLAS>) {
  chomp $line;
  @data = split(':', $line);
  $clas{$data[0]} = "yes";
  if ($data[0] ne 'notinavail') {
    $fullname{$data[0]} = $data[2];
  } else {
    push (@clasnocommit, $data[2]);
  }
}
close CLAS;

# A fairly liberal regular expression that matches non-relative URLs
my $urlre='[a-zA-Z][\w+\-.]*://[\w;/?:@&=+\$.\-_!~*\'()%,#]+';

# retrieve various lists
`rm -rf wget-workarea`;
`/usr/local/bin/wget -q -Pwget-workarea http://www.apache.org/foundation/members.html`;
`/usr/local/bin/wget -q -Pwget-workarea http://jakarta.apache.org/site/whoweare.html`;
`/usr/local/bin/wget -q -Pwget-workarea http://httpd.apache.org/contributors/`;
my $page;

# Consume Jakarta
open(JAKARTA, 'wget-workarea/whoweare.html');
$page = join('',<JAKARTA>);
$page =~ s/\s+/ /g;
while ($page =~ m{<b> ?<a href="($urlre)">([\w ]+)</a> ?</b>}msg) {
  $website{$2}=$1;
}
close JAKARTA;

# Consume httpd
open(HTTPD, 'wget-workarea/index.html');
my $name = undef;
while (<HTTPD>) {
  ($id,$name)=($1,$2) if /Name:.*<a name="(\w+)">(.*?)<\/a>/;
  if (/URL:/) {
    $website{$id}=$website{$name}=$1 if $name and m{href="($urlre)"};
    $name = undef;
  }
}
close HTTPD;

# Consume members
open(MEMBERS, 'wget-workarea/members.html');
$page = join('',<MEMBERS>);
$page =~ s/\s+/ /g;
while ($page =~ m{<strong> ?<a href="($urlre)">([\w ]+)</a> ?</strong>}msg) {
  $website{$2}=$1;
}
close MEMBERS;

# populate module hash from avail file
# tally stores the CVS modules the user is a member of
if (0) {
open(AVAIL, '/home/cvs/CVSROOT/avail') || die "Cannot open avail file\n";
while ($line = <AVAIL>) {
  chomp $line;
  @data = split('\|', $line);
  if ($data[0] eq 'avail') {
    foreach $name (split(',', $data[2])) {
      next unless -d "/home/cvspublic/$name";
      foreach $id (split(',', $data[1])) {
        if ($iam eq 'projects') {
          if (!isin($id, @{$module{$name}})) {
            push (@{$module{$name}}, $id);
          }
        } else {
          if (!isin($id, @{$module{"cvs/svn-committers"}})) {
            push (@{$module{"cvs/svn-committers"}}, $id);
          }
        }
        if (!isin($name, @{$tally{$id}})) {
          push (@{$tally{$id}}, $name);
        }
      }
    }
  }
}
}

# Be SVN aware
my $section;
open(SVNAUTH, './asf-authorization') || die "Cannot open svnauth file\n";
while (<SVNAUTH>) {
  $section=$1 if /\[(.*?)\]/;
  if ($section eq "groups" and /=/) {
    my ($name,$ids) = /(.*?)=(.*)/;
    next if ($name =~ /committers-./);
    foreach $id (split(',', $ids)) {
        if ($iam eq 'projects') {
          if (!isin($id, @{$module{$name}})) {
            push (@{$module{$name}}, $id);
          }
        } else {
          if (!isin($id, @{$module{"cvs/svn-committers"}})) {
            push (@{$module{"cvs/svn-committers"}}, $id);
          }
        }
        if (!isin($name, @{$tally{$id}})) {
          push (@{$tally{$id}}, $name);
        }
    }
  }
}

# Start of the HTML file. The LEFT SIDE NAVIGATION shamelessly
# stolen from the (old) main www.apache.org static pages.

print <<EOF;
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
               "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html>
<head>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
 <title> $title </title>
</head>
<body bgcolor="#ffffff" text="#000000" link="#525D76">
 <font color="#000000" size="-1" face="verdana,arial,helvetica,sanserif">
 <table border="0" width="100%" cellspacing="0">
  <tr><td align="left" valign="top">
   <a href="http://www.apache.org/"><img src="http://www.apache.org/images/asf_logo_wide.gif" alt="The Apache Software Foundation" align="left" border="0"/></a>
   </td></tr>
   <tr><td><hr noshade="noshade" size="1"></td></tr>
 </table>
 <h1> $title </h1>
 <table cellspacing="4" width="100%" border="0">
  <tr>
   <td colspan="2">
   <hr size="1" noshade="">
   </td>
  </tr>
  <tr>
  <!-- LEFT SIDE NAVIGATION -->
   <td valign="top" nowrap="nowrap">
   <p><b><a href="/foundation/projects.html">Apache Projects</a></b></p>
   <menu compact="compact">
    <li><a href="http://httpd.apache.org/">HTTP Server</a></li>
    <li><a href="http://apr.apache.org/">APR</a></li>
    <li><a href="http://jakarta.apache.org/">Jakarta</a></li>
    <li><a href="http://perl.apache.org/">Perl</a></li>
    <li><a href="http://tcl.apache.org/">TCL</a></li>
    <li><a href="http://xml.apache.org/">XML</a></li>
    <li><a href="/foundation/conferences.html">Conferences</a></li>
    <li><a href="/foundation/">Foundation</a></li>
   </menu>
   <p><b><a href="/foundation/">Foundation</a></b></p>
   <menu compact="compact">
    <li><a href="/foundation/faq.html">FAQ</a></li>
    <li><a href="/foundation/roles.html">Management</a></li>
    <li><a href="/foundation/news.html">News &amp; Status</a></li>
    <li><a href="/foundation/press/kit/">Press Kit</a></li>
    <li><a href="/foundation/contact.html">Contact</a></li>
   </menu>
   <p><b>Get Involved</b></p>
   <menu compact="compact">
    <li><a href="/foundation/contributing.html">Contributing</a></li>
    <li><a href="/foundation/mailinglists.html">Mailing Lists</a></li>
    <li><a href="/foundation/cvs.html">CVS Repositories</a></li>
   </menu>
   <p><b>Download</b></p>
   <menu compact="compact">
    <li><a href="http://www.apache.org/dyn/closer.cgi">from a mirror</a></li>
    <li><a href="/dist/">from here</a></li>
   </menu>
   <p><b>Sister Projects</b></p>
   <menu compact="compact">
    <li><a href="http://modules.apache.org/">Module Registry</a></li>
    <li><a href="http://www.apache-ssl.org/">Apache-SSL</a></li>
    <li><a href="http://www.modssl.org/">mod_ssl</a></li>
   </menu>
  </td>
  <!-- RIGHT SIDE INFORMATION -->
  <td valign="top" align="left">
   <h2>Project Modules</h2>
EOF

# produce an index;
@modules = sort keys %module;

# create at most a "table" with 5 columns... May need to reduce that
# (making it longer) to avoid VERY wide pages. For now, we just
# reduce the font size one more tic
$cols = 5;
$rows = (scalar(@modules) + $cols - 1) / $cols;
print <<EOF;
   <font color="#000000" size="-2" face="verdana,arial,helvetica,sanserif">
   <table border="0">
EOF
foreach $row (0..$rows) {
  print "    <tr>\n";
  foreach $col (0..$cols) {
    $name = $modules[$row+$col*$rows];
    if (defined($name)) {
      print "     <td><a href=\"#$name\">$name</a></td>\n";
    }
  }
  print "    </tr>\n";
}
if ($iam ne 'projects') {
  print "     <tr><td><a href=\"#unlistedclas\">Unlisted CLAs</a></td><tr>\n";
}
print <<EOF;
   </table>
   </font>
EOF

# The committers page includes an extra table cell, which lists the cvs
# modules they are committers of
if ($iam eq "projects") {
  $header = "<tr><td bgcolor=\"#039acc\">CVS/SVN id</td><td bgcolor=\"#039acc\">Name</td></tr>";
  $listocvs = "";
} else {
  $header = "<tr><td bgcolor=\"#039acc\">CVS/SVN id</td><td bgcolor=\"#039acc\">Name</td><td bgcolor=\"#039acc\">CVS/SVN Projects</td></tr>";
}

# produce a sorted list of committers to each project
print "   <hr size=\"1\" noshade=\"\">\n";
foreach $name (@modules) {
  print <<EOF;
   <h2 id="$name">$name</h2>
   <table border="0" width="100%">
    $header
EOF
  @dummy = sort @{$module{$name}};
  foreach $id (@dummy) {
    if (exists($members{$id})) {
      $b1="<b class=\"member\">"; $b2="</b>";
    } else {
      $b1=$b2="";
    }

    if (!exists($clas{$id})) {
      $b1="<i>$b1"; $b2="$b2</i>";
    }

    if (exists($website{$id})) {
      $a1="<a href=\"$website{$id}\">"; $a2="</a>";
    } elsif (exists($website{$fullname{$id}})) {
      $a1="<a href=\"$website{$fullname{$id}}\">"; $a2="</a>";
    } else {
      $a1=$a2="";
    }

    if ($iam ne "projects") {
      $listocvs = "";
      foreach $cvss (sort @{$tally{$id}}) {
        $listocvs .= " $cvss,";
      }
      chop $listocvs;
      $listocvs = "<td bgcolor=\"#a0ddf0\">$listocvs</td>";
    }

    print <<EOF;
     <tr>
      <td bgcolor="#a0ddf0">${b1}${a1}${id}${a2}${b2}</td>
      <td bgcolor="#a0ddf0">${b1}${fullname{$id}}${b2}</td>
      $listocvs
     </tr>
EOF
  }
  print "   </table>\n";
}

# Now list those with CLAs on file, but who are not
# committers (ie: lack commit privs). Only do this
# for the committer page
if ($iam ne "projects") {
  print <<EOF;
   <h2 id="unlistedclas">Unlisted CLAs</h2>
   <table border="0" width="100%">
    <tr><td colspan=4 bgcolor="#039acc">Persons with signed CLAs but are not committers:</td></tr>
EOF

  while (@clasnocommit > 0) {
    print <<EOF;
     <tr>
      <td bgcolor="#a0ddf0">$clasnocommit[0]</td>
      <td bgcolor="#a0ddf0">$clasnocommit[1]</td>
      <td bgcolor="#a0ddf0">$clasnocommit[2]</td>
      <td bgcolor="#a0ddf0">$clasnocommit[3]</td>
     </tr>
EOF
    splice(@clasnocommit, 0, 4);
  }
  print "   </table>\n";
}

# closing
print <<EOF;
  </td></tr>
 </table>
 </font>
 <div align="center">
 <font color="#000000" size="-2" face="verdana,arial,helvetica,sanserif">
 Last updated: $timestamp<br>
 Entries in <i>italics</i> do <b>NOT</b> have a signed <a href="http://incubator.apache.org/forms/ASF_Contributor_License_2_form.pdf">Contributor License Agreement</a> on file (this knowledge is keyed by CVS/SVN id)<br>
 Entries in <b>bold</b> are ASF members.
 </font>
 </div>
</body>
</html>
EOF

exit;
