From: Vincent Pit Date: Sat, 11 Dec 2010 23:57:07 +0000 (+0100) Subject: An even smarter gengentooisms script X-Git-Tag: v0.11~13 X-Git-Url: http://git.vpit.fr/?p=perl%2Fmodules%2FCPANPLUS-Dist-Gentoo.git;a=commitdiff_plain;h=81b5731129ec4df7c299707f9650bf4a693a8154 An even smarter gengentooisms script --- diff --git a/.gitignore b/.gitignore index 4b44dd6..4068730 100644 --- a/.gitignore +++ b/.gitignore @@ -24,5 +24,6 @@ cover_db Debian_CPANTS.txt -gentooisms.sto +gentooisms.data.sto +gentooisms.state.sto lib/CPANPLUS/Dist/Gentoo/Maps.pm.bak diff --git a/lib/CPANPLUS/Dist/Gentoo/Maps.pm b/lib/CPANPLUS/Dist/Gentoo/Maps.pm index 0cb1af2..b9b9611 100644 --- a/lib/CPANPLUS/Dist/Gentoo/Maps.pm +++ b/lib/CPANPLUS/Dist/Gentoo/Maps.pm @@ -197,6 +197,7 @@ $version_mismatch{$_} = $insert_dot_at_all_2 for qw/ Curses-UI DBD-mysql Email-MessageID + Exception-Base ExtUtils-CBuilder ExtUtils-ParseXS FileHandle-Unget @@ -363,6 +364,7 @@ Locale-Maketext-Lexicon locale-maketext-lexicon Log-Dispatch log-dispatch Math-Pari math-pari Module-Info module-info +MogileFS-Server mogilefs-server NTLM Authen-NTLM Net-Ping net-ping Net-SFTP net-sftp diff --git a/samples/gengentooisms b/samples/gengentooisms index 74a46e9..e5f8f13 100755 --- a/samples/gengentooisms +++ b/samples/gengentooisms @@ -11,6 +11,18 @@ use Storable (); use Term::ANSIColor; use CPAN::DistnameInfo; +BEGIN { + my $old_cdi_new = \&CPAN::DistnameInfo::new; + die 'CPAN::DistnameInfo is not loaded' unless $old_cdi_new; + my $new_cdi_new = sub { + my $dist = $old_cdi_new->(@_); + $dist->{version} =~ s/-withoutworldwriteables$//; + $dist; + }; + no warnings 'redefine'; + *CPAN::DistnameInfo::new = $new_cdi_new; +} + use Capture::Tiny qw/capture/; use LWP::UserAgent; use Parse::CPAN::Packages::Fast; @@ -26,20 +38,13 @@ use constant CPAN_MIRROR => 'http://www.cpan.org/'; use constant PORTAGE => '/usr/portage'; use constant TARGET => 'lib/CPANPLUS/Dist/Gentoo/Maps.pm'; use constant BACKUP => TARGET . '.bak'; -use constant STATE_FILE => 'gentooisms.sto'; +use constant DATA_FILE => 'gentooisms.data.sto'; +use constant STATE_FILE => 'gentooisms.state.sto'; -my %gentooism; - -my %is_on_cpan = map { $_ => 1 } qw/ - Business-FedEx-DirectConnect - CDDB_get - CursesWidgets - SpeedyCGI - WattsUp-Daemon - frontier-rpc - gnome2-perl - gnome2-wnck -/; +my %is_on_cpan = ( + 'Audio-CD-disc-cover' => 0, + 'WattsUp-Daemon' => 1, +); sub p { my ($indent, $fmt, @args) = @_; @@ -47,14 +52,39 @@ sub p { printf $fmt, @args; } -my (@not_on_cpan, %name_mismatch, %version); +sub timestamp { + my $tm = File::Spec->catfile(PORTAGE, 'metadata', 'timestamp.chk'); + return unless -e $tm; + open my $fh, '<', $tm; + local $/; + <$fh>; +} + +my $timestamp = timestamp(); + +{ + my $ua; + + sub cpan_http_test { + my ($path) = @_; + + unless (defined $ua) { + $ua = LWP::UserAgent->new; + $ua->agent('CPANPLUS::Dist::Gentoo gentooisms generator/1.0'); + } + + my $r = $ua->head(CPAN_MIRROR . $path); + + return $r && $r->code == 200; + } +} + +my %fetched_uri; +my (@not_on_cpan, @unfindable, @missing, %name_mismatch, %version); sub parse_portage_tree { my $pcp = Parse::CPAN::Packages::Fast->new(PACKAGES); - my $ua = LWP::UserAgent->new; - $ua->agent('CPANPLUS::Dist::Gentoo gentooisms generator/1.0'); - for my $category (qw/perl-core dev-perl/) { p(0, "Browsing the $category category.\n"); @@ -65,34 +95,54 @@ sub parse_portage_tree { my $pkg_name = (File::Spec->splitdir($pkg_dir))[-1]; + if (exists $is_on_cpan{$pkg_name} and not $is_on_cpan{$pkg_name}) { + p(2, colored("$pkg_name is not a CPAN distribution (forced)", 'bright_red') + . "\n"); + push @not_on_cpan, "$category/$pkg_name"; + next; + } + my $last = reduce { $a->[1] > $b->[1] ? $a : $b } map [ $_, CPANPLUS::Dist::Gentoo::Atom->new_from_ebuild($_) ], glob File::Spec->catfile($pkg_dir, "$pkg_name-*"); my ($ebuild, $atom) = @$last; p(1, "%s/%s-%s\n", map $atom->$_, qw/category name version/); - my @cmd = ('ebuild', $ebuild, 'help', '--debug'); - my ($ret, $code); - (undef, my $err) = capture { - $ret = system { $cmd[0] } @cmd; - $code = $?; - }; - if ($ret != 0 or $code == -1 or $code & 127 or $code >> 8) { - die "system(\"@cmd\") returned $ret and/or failed with status $code"; - } + my $uri; + if (exists $fetched_uri{$ebuild}) { + $uri = $fetched_uri{$ebuild}; + } else { + my @cmd = ('ebuild', $ebuild, 'help', '--debug'); + my ($ret, $code); + (undef, my $err) = capture { + $ret = system { $cmd[0] } @cmd; + $code = $?; + }; + if ($ret != 0 or $code == -1 or $code & 127 or $code >> 8) { + die "system(\"@cmd\") returned $ret and/or failed with status $code"; + } - my ($uri, $fqn_dist, $path); - while ($err =~ /SRC_URI=((['"]).*?\2|\S+)/gs) { - $uri = $1; - $uri =~ s{^(['"])(.*?)\1$}{$2}s; + while ($err =~ /SRC_URI=((['"]).*?\2|\S+)/gs) { + $uri = $1; + $uri =~ s{^(['"])(.*?)\1$}{$2}s; + } + $fetched_uri{$ebuild} = $uri; + Storable::store([ + $timestamp, + \%fetched_uri, + ] => DATA_FILE); } + + my ($fqn_dist, $path); if (defined $uri) { if ($uri =~ m{cpan.*?/id/(\S+)}) { $fqn_dist = $1; $path = "authors/id/$fqn_dist"; + $is_on_cpan{$pkg_name} = 1; } elsif ($uri =~ m{mirror://cpan/(\S+)}) { $path = $1; - } elsif ($is_on_cpan{$pkg_name} and $uri =~ m{/([^/\s]+)(?:\s|$)}) { + $is_on_cpan{$pkg_name} = 1; + } elsif ($uri =~ m{/([^/\s]+)(?:\s|$)}) { my $archive = $1; my ($top_level) = $archive =~ /^([^-]+)/; $path = "modules/by-module/$top_level/$archive"; @@ -100,7 +150,7 @@ sub parse_portage_tree { } unless (defined $path) { - p(2, "doesn't seem to be fetching its tarball from a CPAN mirror.\n"); + p(2, "doesn't seem to be fetching its tarball from a CPAN mirror\n"); p(2, colored("$pkg_name is not a CPAN distribution", 'bright_red') . "\n"); push @not_on_cpan, "$category/$pkg_name"; next; @@ -109,23 +159,77 @@ sub parse_portage_tree { my $dist; if (defined $fqn_dist) { - $dist = eval { $pcp->distribution($fqn_dist) }; - p(2, defined $dist ? "is indexed on the CPAN\n" : 'is not indexed, but '); - } else { - p(2, 'is '); + p(2, 'is indexed on the CPAN... '); + $dist = do { + local $@; + eval { $pcp->distribution($fqn_dist) } + }; + print defined $dist ? "yes\n" : "no\n"; } + unless (defined $dist) { - print 'maybe on a CPAN mirror... '; - my $r = $ua->head(CPAN_MIRROR . $path); - if ($r and $r->code == 200) { + p(2, 'can directly be found on a CPAN mirror... '); + if (cpan_http_test($path)) { print "yes\n"; + $dist = CPAN::DistnameInfo->new($path); + } else { + print "no\n"; + } + } + + my ($pseudo_dist, $latest_dist); + + unless (defined $dist) { + p(2, 'has the same name as a distribution on the CPAN... '); + $path =~ m{([^/\s]+)$} or die 'Could not get the last part of the path'; + my $archive = $1; + $pseudo_dist = CPAN::DistnameInfo->new($archive); + $latest_dist = do { + local $@; + eval { $pcp->latest_distribution($pseudo_dist->dist) }; + }; + my ($latest_file, $latest_author); + if (defined $latest_dist) { + $latest_file = $latest_dist->filename; + $latest_author = $latest_dist->cpanid; + printf "yes, %s by %s\n", + $latest_file, + (defined $latest_author ? $latest_author : 'unknown'); } else { print "no\n"; + } + + if (defined $latest_author) { + my ($au, $a) = $latest_author =~ /^((.).)/ or die 'Author name too short'; + p(2, 'is in that author\'s CPAN directory... '); + my $alternate_path = "authors/id/$a/$au/$latest_author/$archive"; + if ($alternate_path eq $path) { + print "already checked\n"; + } elsif (cpan_http_test($alternate_path)) { + $dist = CPAN::DistnameInfo->new($alternate_path); + print "yes\n"; + } else { + print "no\n"; + } + unless (defined $dist) { + push @missing, + "$category/$pkg_name (latest is $latest_file by $latest_author)"; + } + } + } + + unless (defined $dist) { + if ($latest_dist or $is_on_cpan{$pkg_name}) { + $dist = $pseudo_dist; + unless ($latest_dist) { + push @unfindable, "$category/$pkg_name"; + } + p(2, "seems to come from the CPAN anyway\n"); + } else { p(2, colored("$pkg_name is not a CPAN distribution", 'bright_red') . "\n"); push @not_on_cpan, "$category/$pkg_name"; next; } - $dist = CPAN::DistnameInfo->new($path); } my $dist_name = $dist->dist; @@ -150,41 +254,48 @@ sub parse_portage_tree { } } -sub timestamp { - my $tm = File::Spec->catfile(PORTAGE, 'metadata', 'timestamp.chk'); - return unless -e $tm; - open my $fh, '<', $tm; - local $/; - <$fh>; -} - my $already_parsed = 0; -my $timestamp = timestamp(); if (-e STATE_FILE) { - my $data = Storable::retrieve(STATE_FILE); - if ($data->[0] eq $timestamp) { - printf "Data retrieved from %s\n", STATE_FILE; - @not_on_cpan = @{ $data->[1] }; - %name_mismatch = %{ $data->[2] }; - %version = %{ $data->[3] }; + my $state = Storable::retrieve(STATE_FILE); + if ($state->[0] eq $timestamp) { + printf "State retrieved from %s\n", STATE_FILE; + @not_on_cpan = @{ $state->[1] }; + @unfindable = @{ $state->[2] }; + @missing = @{ $state->[3] }; + %name_mismatch = %{ $state->[4] }; + %version = %{ $state->[5] }; $already_parsed = 1; } else { - printf "Obsolete data file %s, regenerating data\n", STATE_FILE; + printf "Obsolete state file %s, regenerating\n", STATE_FILE; 1 while unlink STATE_FILE; } } unless ($already_parsed) { + if (-e DATA_FILE) { + my $data = Storable::retrieve(DATA_FILE); + if ($data->[0] eq $timestamp) { + printf "Data retrieved from %s\n", DATA_FILE; + %fetched_uri = %{ $data->[1] }; + } else { + printf "Obsolete data file %s, regenerating\n", DATA_FILE; + 1 while unlink DATA_FILE; + } + } + parse_portage_tree(); print "\n"; + Storable::store([ $timestamp, \@not_on_cpan, + \@unfindable, + \@missing, \%name_mismatch, \%version, ] => STATE_FILE); - printf "Data stored to %s\n", STATE_FILE; + printf "State stored to %s\n", STATE_FILE; } print "\n"; @@ -193,6 +304,12 @@ p(0, "Summary\n"); p(1, "Not on the CPAN:\n"); p(2, "$_\n") for @not_on_cpan; +p(1, "Alleged to be on the CPAN, but unfindable:\n"); +p(2, "$_\n") for @unfindable; + +p(1, "Only a different version is on the CPAN:\n"); +p(2, "$_\n") for @missing; + p(1, "Name mismatch:\n"); for my $dist_name (sort keys %name_mismatch) { my $pkg_name = $name_mismatch{$dist_name};