X-Git-Url: http://git.vpit.fr/?p=perl%2Fmodules%2FCPANPLUS-Dist-Gentoo.git;a=blobdiff_plain;f=samples%2Fgengentooisms;h=34d5869fea17a58492a11f0ba268c76641fd3465;hp=b554f9bb2916da14a4dd4778f7adc8fef4e5c88e;hb=ac553cd6107b4c4470b19f1b77a73889cd604659;hpb=2eb39130ee05208fc41d7c671a4050d64799c00b diff --git a/samples/gengentooisms b/samples/gengentooisms index b554f9b..34d5869 100755 --- a/samples/gengentooisms +++ b/samples/gengentooisms @@ -1,63 +1,354 @@ #!/usr/bin/env perl -# This scrit is meant to guess gentooisms by looking into the portage tree. -# A really good one would use the CPANPLUS API to check if the dist name -# candidates are really on CPAN. - use strict; use warnings; use Fatal; -use List::Util qw/max/; - -use constant PORTAGE => '/usr/portage'; - -my %gentooism; - -for my $category (qw/perl-core dev-perl/) { - my $dir = PORTAGE . '/' . $category; - chdir $dir; - for my $name (<*>) { - next unless -d $name; - my $eb = (sort glob "$dir/$name/$name-*")[-1]; - open my $fh, '<', $eb; - my ($pn, $on_cpan); - while (<$fh>) { - $on_cpan = 1 if not defined $on_cpan - and /(?:MODULE_AUTHOR|SRC_URI=.*?(?i:cpan))/; - if (not defined $pn and /_PN?=(.*)/) { - $pn = $1; - if ($pn =~ /^\s*["']?\s*\$\{PN?\}/) { - undef $pn; - next; +use File::Spec; +use File::Copy qw/copy/; +use List::Util qw/max reduce/; +use Storable (); +use Term::ANSIColor; + +use CPAN::DistnameInfo 0.11; + +use Capture::Tiny qw/capture/; +use LWP::UserAgent; +use Parse::CPAN::Packages::Fast; + +use lib 'lib'; +use CPANPLUS::Dist::Gentoo::Atom; +use CPANPLUS::Dist::Gentoo::Maps; + +use constant PACKAGES => File::Spec->catdir( + $ENV{HOME}, '.cpanplus', '02packages.details.txt.gz' +); +use constant CPAN_MIRROR => 'http://www.cpan.org/'; +use constant PORTAGE => '/usr/portage'; +use constant TARGET => 'lib/CPANPLUS/Dist/Gentoo/Maps.pm'; +use constant BACKUP => TARGET . '.bak'; +use constant DATA_FILE => 'gentooisms.data.sto'; +use constant STATE_FILE => 'gentooisms.state.sto'; + +my %is_on_cpan = ( + 'Audio-CD-disc-cover' => 0, + 'WattsUp-Daemon' => 1, +); + +sub p { + my ($indent, $fmt, @args) = @_; + $fmt = (' ' x ($indent * 3)) . $fmt; + printf $fmt, @args; +} + +sub timestamp { + my $tm = File::Spec->catfile(PORTAGE, 'metadata', 'timestamp.chk'); + return unless -e $tm; + open my $fh, '<', $tm; + local $/; + <$fh>; +} + +my $timestamp = timestamp(); + +{ + my $ua; + + sub cpan_http_test { + my ($path) = @_; + + unless (defined $ua) { + $ua = LWP::UserAgent->new; + $ua->agent('CPANPLUS::Dist::Gentoo gentooisms generator/1.0'); + } + + my $r = $ua->head(CPAN_MIRROR . $path); + + return $r && $r->code == 200; + } +} + +my %fetched_uri; +my (@not_on_cpan, @unfindable, @missing, %name_mismatch, %version); + +sub parse_portage_tree { + my $pcp = Parse::CPAN::Packages::Fast->new(PACKAGES); + + for my $category (qw/perl-core dev-perl/) { + p(0, "Browsing the $category category.\n"); + + my $cat_dir = File::Spec->catdir(PORTAGE, $category); + + for my $pkg_dir (glob File::Spec->catdir($cat_dir, '*')) { + next unless -d $pkg_dir; + + my $pkg_name = (File::Spec->splitdir($pkg_dir))[-1]; + + my $last = reduce { $a->[1] > $b->[1] ? $a : $b } + map [ $_, CPANPLUS::Dist::Gentoo::Atom->new_from_ebuild($_) ], + glob File::Spec->catfile($pkg_dir, "$pkg_name-*"); + my ($ebuild, $atom) = @$last; + p(1, "%s/%s-%s\n", map $atom->$_, qw/category name version/); + + if (exists $is_on_cpan{$pkg_name} and not $is_on_cpan{$pkg_name}) { + p(2, colored("$pkg_name is not a CPAN distribution (forced)", 'bright_red') + . "\n"); + push @not_on_cpan, "$category/$pkg_name"; + next; + } + + my $uri; + if (exists $fetched_uri{$ebuild}) { + $uri = $fetched_uri{$ebuild}; + } else { + my @cmd = ('ebuild', $ebuild, 'help', '--debug'); + my ($ret, $code); + (undef, my $err) = capture { + $ret = system { $cmd[0] } @cmd; + $code = $?; + }; + if ($ret != 0 or $code == -1 or $code & 127 or $code >> 8) { + die "system(\"@cmd\") returned $ret and/or failed with status $code"; } - $pn =~ s!\$[{(][^/]*?[})]!!g; - $pn =~ s!\$\{P?V.*?\}!!g; - $pn =~ s/^\s*["']?\s*-*\s*//; - $pn =~ s/\s*-*\s*["']?\s*$//; - $pn =~ s/-\d+\..*//; - if ($pn =~ m!\$\{PN?(/.*?/(?:.*/?)?)\}!) { - my $s = $1; - $s .= '/' if $s =~ tr!/!! <= 2; - eval "(\$pn = \$name) =~ s$s"; + + while ($err =~ /SRC_URI=((['"]).*?\2|\S+)/gs) { + $uri = $1; + $uri =~ s{^(['"])(.*?)\1$}{$2}s; } + $fetched_uri{$ebuild} = $uri; + Storable::store([ + $timestamp, + \%fetched_uri, + ] => DATA_FILE); } - } - if ($pn and $pn ne $name) { - if ($on_cpan) { - $gentooism{$pn} = $name; - } else { - print STDERR "'$pn' => '$name' may not be on CPAN\n"; + + my ($fqn_dist, $path); + if (defined $uri) { + if ($uri =~ m{cpan.*?/id/(\S+)}) { + $fqn_dist = $1; + $path = "authors/id/$fqn_dist"; + $is_on_cpan{$pkg_name} = 1; + } elsif ($uri =~ m{mirror://cpan/(\S+)}) { + $path = $1; + $is_on_cpan{$pkg_name} = 1; + } elsif ($uri =~ m{/([^/\s]+)(?:\s|$)}) { + my $archive = $1; + my ($top_level) = $archive =~ /^([^-]+)/; + $path = "modules/by-module/$top_level/$archive"; + } } + + unless (defined $path) { + p(2, "doesn't seem to be fetching its tarball from a CPAN mirror\n"); + p(2, colored("$pkg_name is not a CPAN distribution", 'bright_red') . "\n"); + push @not_on_cpan, "$category/$pkg_name"; + next; + } + p(2, "fetches $path\n"); + + my $dist; + if (defined $fqn_dist) { + p(2, 'is indexed on the CPAN... '); + $dist = do { + local $@; + eval { $pcp->distribution($fqn_dist) } + }; + print defined $dist ? "yes\n" : "no\n"; + } + + unless (defined $dist) { + p(2, 'can directly be found on a CPAN mirror... '); + if (cpan_http_test($path)) { + print "yes\n"; + $dist = CPAN::DistnameInfo->new($path); + } else { + print "no\n"; + } + } + + my ($pseudo_dist, $latest_dist); + + unless (defined $dist) { + p(2, 'has the same name as a distribution on the CPAN... '); + $path =~ m{([^/\s]+)$} or die 'Could not get the last part of the path'; + my $archive = $1; + $pseudo_dist = CPAN::DistnameInfo->new($archive); + $latest_dist = do { + local $@; + eval { $pcp->latest_distribution($pseudo_dist->dist) }; + }; + my ($latest_file, $latest_author); + if (defined $latest_dist) { + $latest_file = $latest_dist->filename; + $latest_author = $latest_dist->cpanid; + printf "yes, %s by %s\n", + $latest_file, + (defined $latest_author ? $latest_author : 'unknown'); + } else { + print "no\n"; + } + + if (defined $latest_author) { + my ($au, $a) = $latest_author =~ /^((.).)/ or die 'Author name too short'; + p(2, 'is in that author\'s CPAN directory... '); + my $alternate_path = "authors/id/$a/$au/$latest_author/$archive"; + if ($alternate_path eq $path) { + print "already checked\n"; + } elsif (cpan_http_test($alternate_path)) { + $dist = CPAN::DistnameInfo->new($alternate_path); + print "yes\n"; + } else { + print "no\n"; + } + unless (defined $dist) { + push @missing, + "$category/$pkg_name (latest is $latest_file by $latest_author)"; + } + } + } + + unless (defined $dist) { + if ($latest_dist or $is_on_cpan{$pkg_name}) { + $dist = $pseudo_dist; + unless ($latest_dist) { + push @unfindable, "$category/$pkg_name"; + } + p(2, "seems to come from the CPAN anyway\n"); + } else { + p(2, colored("$pkg_name is not a CPAN distribution", 'bright_red') . "\n"); + push @not_on_cpan, "$category/$pkg_name"; + next; + } + } + + my $dist_name = $dist->dist; + if ($dist_name ne $pkg_name) { + p(2, colored("$dist_name => $pkg_name", 'bright_yellow') . "\n"); + $name_mismatch{$dist_name} = $pkg_name; + } + + my $pkg_version = $atom->version . ''; + $pkg_version =~ s/-r\d+$//; + my $dist_version = $dist->version; + my $mapped_version = CPANPLUS::Dist::Gentoo::Maps::version_c2g( + undef, # default conversion + $dist_version, + ); + if ($mapped_version ne $pkg_version) { + my $str = "$dist_version => $mapped_version != $pkg_version"; + p(2, colored($str, 'bright_cyan') . "\n"); + } + $version{$dist_name} = [ $dist_version => $pkg_version ]; + } + } +} + +my $already_parsed = 0; + +if (-e STATE_FILE) { + my $state = Storable::retrieve(STATE_FILE); + if ($state->[0] eq $timestamp) { + printf "State retrieved from %s\n", STATE_FILE; + @not_on_cpan = @{ $state->[1] }; + @unfindable = @{ $state->[2] }; + @missing = @{ $state->[3] }; + %name_mismatch = %{ $state->[4] }; + %version = %{ $state->[5] }; + $already_parsed = 1; + } else { + printf "Obsolete state file %s, regenerating\n", STATE_FILE; + 1 while unlink STATE_FILE; + } +} + +unless ($already_parsed) { + if (-e DATA_FILE) { + my $data = Storable::retrieve(DATA_FILE); + if ($data->[0] eq $timestamp) { + printf "Data retrieved from %s\n", DATA_FILE; + %fetched_uri = %{ $data->[1] }; + } else { + printf "Obsolete data file %s, regenerating\n", DATA_FILE; + 1 while unlink DATA_FILE; } } + + parse_portage_tree(); + print "\n"; + + Storable::store([ + $timestamp, + \@not_on_cpan, + \@unfindable, + \@missing, + \%name_mismatch, + \%version, + ] => STATE_FILE); + printf "State stored to %s\n", STATE_FILE; +} + +print "\n"; +p(0, "Summary\n"); + +p(1, "Not on the CPAN:\n"); +p(2, "$_\n") for @not_on_cpan; + +p(1, "Alleged to be on the CPAN, but unfindable:\n"); +p(2, "$_\n") for @unfindable; + +p(1, "Only a different version is on the CPAN:\n"); +p(2, "$_\n") for @missing; + +p(1, "Name mismatch:\n"); +for my $dist_name (sort keys %name_mismatch) { + my $pkg_name = $name_mismatch{$dist_name}; + my $mapped_name = CPANPLUS::Dist::Gentoo::Maps::name_c2g($dist_name); + + my $fixed = $mapped_name eq $pkg_name; + my $eq = $fixed ? '==' : '!='; + my $str = colored( + "$dist_name => $mapped_name $eq $pkg_name", + $fixed ? 'bright_green' : 'bright_red' + ); + p(2, "$str\n"); +} + +p(1, "Version mismatch:\n"); +for (sort keys %version) { + my ($dist_version, $pkg_version) = @{$version{$_}}; + my $default_mapped_version = CPANPLUS::Dist::Gentoo::Maps::version_c2g( + undef, + $dist_version, + ); + my $mapped_version = CPANPLUS::Dist::Gentoo::Maps::version_c2g( + $_, + $dist_version, + ); + if ($default_mapped_version ne $pkg_version) { + my $fixed = $mapped_version eq $pkg_version; + my $eq = $fixed ? '==' : '!='; + my $str = colored( + "$dist_version => $mapped_version $eq $pkg_version", + $fixed ? 'bright_green' : 'bright_red' + ); + p(2, "$_: $str\n"); + } } -my $max = max map length, keys %gentooism; +copy TARGET, BACKUP or die "copy failed: $!"; + +open my $src, '<', BACKUP; +open my $dst, '>', TARGET; + +my $max = max map length, keys %name_mismatch; -print STDERR +(keys %gentooism) . " gentooisms found\n"; +SRC: while (<$src>) { + print $dst $_; + if (/^__DATA__$/) { + printf $dst "%s%s %s\n", $_, (' ' x ($max - length)), $name_mismatch{$_} + for sort keys %name_mismatch; + last SRC; + } +} -print "my %gentooism = (\n"; -printf " '%s'%s => '%s',\n", $_, (' ' x ($max - length)), $gentooism{$_} - for sort keys %gentooism; -print ");\n"; +print "\n" . +(keys %name_mismatch) . " name mismatches found\n";