X-Git-Url: http://git.vpit.fr/?a=blobdiff_plain;f=samples%2Fgengentooisms;h=0c39c00ddce471865d57e1c9a1874a4bec0d0d51;hb=961a3a0b428269fd404fc461fb07d9b3f45dd745;hp=b554f9bb2916da14a4dd4778f7adc8fef4e5c88e;hpb=2eb39130ee05208fc41d7c671a4050d64799c00b;p=perl%2Fmodules%2FCPANPLUS-Dist-Gentoo.git diff --git a/samples/gengentooisms b/samples/gengentooisms index b554f9b..0c39c00 100755 --- a/samples/gengentooisms +++ b/samples/gengentooisms @@ -1,63 +1,394 @@ #!/usr/bin/env perl -# This scrit is meant to guess gentooisms by looking into the portage tree. -# A really good one would use the CPANPLUS API to check if the dist name -# candidates are really on CPAN. - use strict; use warnings; use Fatal; -use List::Util qw/max/; - -use constant PORTAGE => '/usr/portage'; - -my %gentooism; - -for my $category (qw/perl-core dev-perl/) { - my $dir = PORTAGE . '/' . $category; - chdir $dir; - for my $name (<*>) { - next unless -d $name; - my $eb = (sort glob "$dir/$name/$name-*")[-1]; - open my $fh, '<', $eb; - my ($pn, $on_cpan); - while (<$fh>) { - $on_cpan = 1 if not defined $on_cpan - and /(?:MODULE_AUTHOR|SRC_URI=.*?(?i:cpan))/; - if (not defined $pn and /_PN?=(.*)/) { - $pn = $1; - if ($pn =~ /^\s*["']?\s*\$\{PN?\}/) { - undef $pn; - next; +use File::Spec; +use File::Copy qw; +use List::Util qw; +use Storable (); +use Term::ANSIColor; + +use CPAN::DistnameInfo 0.11; + +use Capture::Tiny qw; +use LWP::UserAgent; +use Parse::CPAN::Packages::Fast; + +use lib 'lib'; +use CPANPLUS::Dist::Gentoo::Atom; +use CPANPLUS::Dist::Gentoo::Maps; + +use constant PACKAGES => File::Spec->catdir( + $ENV{HOME}, '.cpanplus', '02packages.details.txt.gz' +); +use constant CPAN_MIRROR => 'http://www.cpan.org/'; +use constant PORTAGE => '/usr/portage'; +use constant TARGET => 'lib/CPANPLUS/Dist/Gentoo/Maps.pm'; +use constant BACKUP => TARGET . '.bak'; +use constant DATA_FILE => 'gentooisms.data.sto'; +use constant STATE_FILE => 'gentooisms.state.sto'; + +my %is_on_cpan = ( + 'Audio-CD-disc-cover' => 0, + 'Video-Frequencies' => 0, + 'Sphinx-Search' => 1, + 'WattsUp-Daemon' => 1, +); + +sub p { + my ($indent, $fmt, @args) = @_; + $fmt = (' ' x ($indent * 3)) . $fmt; + printf $fmt, @args; +} + +my $timestamp = CPANPLUS::Dist::Gentoo::Maps::get_portage_timestamp(PORTAGE); +die 'Could not read the timestamp from the portage tree ' . PORTAGE . "\n" + unless defined $timestamp; + +{ + my $ua; + + sub cpan_http_test { + my ($path) = @_; + + unless (defined $ua) { + $ua = LWP::UserAgent->new; + $ua->agent('CPANPLUS::Dist::Gentoo gentooisms generator/1.0'); + } + + my $r = $ua->head(CPAN_MIRROR . $path); + + return $r && $r->code == 200; + } +} + +my %fetched_uri; +my (@not_on_cpan, @unfindable, @missing, %name_mismatch, %version); + +sub parse_portage_tree { + my $pcp = Parse::CPAN::Packages::Fast->new(PACKAGES); + + for my $category (qw) { + p(0, "Browsing the $category category.\n"); + + my $cat_dir = File::Spec->catdir(PORTAGE, $category); + + for my $pkg_dir (glob File::Spec->catdir($cat_dir, '*')) { + next unless -d $pkg_dir; + + my $pkg_name = (File::Spec->splitdir($pkg_dir))[-1]; + + my $last = reduce { $a->[1] > $b->[1] ? $a : $b } + grep $_->[1] != 9999, + map [ $_, CPANPLUS::Dist::Gentoo::Atom->new_from_ebuild($_) ], + glob File::Spec->catfile($pkg_dir, "$pkg_name-*"); + my ($ebuild, $atom) = @$last; + p(1, "%s/%s-%s\n", map $atom->$_, qw); + + if (exists $is_on_cpan{$pkg_name} and not $is_on_cpan{$pkg_name}) { + p(2, colored("$pkg_name is not a CPAN distribution (forced)", 'bright_red') + . "\n"); + push @not_on_cpan, "$category/$pkg_name"; + next; + } + + my $uri; + if (exists $fetched_uri{$ebuild}) { + $uri = $fetched_uri{$ebuild}; + } else { + my @cmd = ('ebuild', $ebuild, 'help', '--debug'); + my ($ret, $code); + (undef, my $err) = capture { + $ret = system { $cmd[0] } @cmd; + $code = $?; + }; + if ($ret != 0 or $code == -1 or $code & 127 or $code >> 8) { + die "system(\"@cmd\") returned $ret and/or failed with status $code"; } - $pn =~ s!\$[{(][^/]*?[})]!!g; - $pn =~ s!\$\{P?V.*?\}!!g; - $pn =~ s/^\s*["']?\s*-*\s*//; - $pn =~ s/\s*-*\s*["']?\s*$//; - $pn =~ s/-\d+\..*//; - if ($pn =~ m!\$\{PN?(/.*?/(?:.*/?)?)\}!) { - my $s = $1; - $s .= '/' if $s =~ tr!/!! <= 2; - eval "(\$pn = \$name) =~ s$s"; + + my %map; + while ($err =~ /([a-zA-Z0-9_]+)=((['"]).*?\3|\S+)/gs) { + my $key = $1; + my $val = $2; + $val =~ s{^(['"])(.*?)\1$}{$2}s; + $map{$key} = $val; } + + $uri = $map{SRC_URI}; + unless (defined $uri) { + my $author = $map{MODULE_AUTHOR}; + if (defined $author) { + my ($au, $a) = $author =~ /^((.).)/; + my $dist_version = $map{MODULE_VERSION}; + $dist_version = $last->[1] unless defined $dist_version; + $uri = "mirror://cpan/$a/$au/$author/$pkg_name/$dist_version.tar.gz"; + } + } + + $fetched_uri{$ebuild} = $uri; + Storable::store([ + $timestamp, + \%fetched_uri, + ] => DATA_FILE); } - } - if ($pn and $pn ne $name) { - if ($on_cpan) { - $gentooism{$pn} = $name; - } else { - print STDERR "'$pn' => '$name' may not be on CPAN\n"; + + my ($fqn_dist, $path); + if (defined $uri) { + if ($uri =~ m{cpan.*?/id/(\S+)}) { + $fqn_dist = $1; + $path = "authors/id/$fqn_dist"; + } elsif ($uri =~ m{mirror://cpan/(\S+)}) { + $path = $1; + } elsif ($uri =~ m{/([^/\s]+)(?:\s|$)}) { + my $archive = $1; + my ($top_level) = $archive =~ /^([^-]+)/; + $path = "modules/by-module/$top_level/$archive"; + } + } + + unless (defined $path) { + p(2, "doesn't seem to be fetching its tarball from a CPAN mirror\n"); + p(2, colored("$pkg_name is not a CPAN distribution", 'bright_red') . "\n"); + push @not_on_cpan, "$category/$pkg_name"; + next; + } + + my $dist; + if (defined $fqn_dist) { + p(2, "is $fqn_dist indexed on the CPAN... "); + $dist = do { + local $@; + eval { $pcp->distribution($fqn_dist) } + }; + print defined $dist ? "yes\n" : "no\n"; + } + + unless (defined $dist) { + p(2, "can $path be found on a CPAN mirror... "); + if (cpan_http_test($path)) { + print "yes\n"; + $dist = CPAN::DistnameInfo->new($path); + } else { + print "no\n"; + } + } + + my ($pseudo_dist, $latest_dist); + + unless (defined $dist) { + $path =~ m{([^/\s]+)$} or die 'Could not get the last part of the path'; + my $archive = $1; + $pseudo_dist = CPAN::DistnameInfo->new($archive); + + p(2, 'is ' . $pseudo_dist->dist . ' the name of a CPAN distribution... '); + $latest_dist = do { + local $@; + eval { $pcp->latest_distribution($pseudo_dist->dist) }; + }; + + unless (defined $latest_dist) { + print "no\n"; + (my $mod_name = $pkg_name) =~ s/-/::/g; + p(2, "is $mod_name indexed in another CPAN distribution... "); + $latest_dist = do { + local $@; + eval { + my $module = $pcp->package($mod_name); + defined $module ? $module->distribution : undef; + }; + }; + if (defined $latest_dist) { + # Re-forge the pseudo dist so that it will pick up the correct dist + # name when looking for a mismatch. + $pseudo_dist = CPAN::DistnameInfo->new( + $latest_dist->dist . '-' . $pseudo_dist->version + . '.' . $pseudo_dist->extension + ); + } + } + + my ($latest_file, $latest_author); + if (defined $latest_dist) { + $latest_file = $latest_dist->filename; + $latest_author = $latest_dist->cpanid; + printf "yes, in %s by %s\n", + $latest_file, + (defined $latest_author ? $latest_author : 'unknown'); + } else { + print "no\n"; + } + + if (defined $latest_author) { + my ($au, $a) = $latest_author =~ /^((.).)/ or die 'Author name too short'; + p(2, "is $archive in that author\'s CPAN directory... "); + my $alternate_path = "authors/id/$a/$au/$latest_author/$archive"; + if ($alternate_path eq $path) { + print "already checked\n"; + } elsif (cpan_http_test($alternate_path)) { + $dist = CPAN::DistnameInfo->new($alternate_path); + print "yes\n"; + } else { + print "no\n"; + } + unless (defined $dist) { + push @missing, + "$category/$pkg_name (latest is $latest_file by $latest_author)"; + } + } } + + unless (defined $dist) { + if ($latest_dist or $is_on_cpan{$pkg_name}) { + $dist = $pseudo_dist; + p(2, "seems to be a CPAN distribution"); + if ($latest_dist) { + print "\n"; + } else { + # Implies $is_on_cpan{$pkg_name} + print " (forced)\n"; + push @unfindable, "$category/$pkg_name"; + } + } else { + p(2, colored("$pkg_name is not a CPAN distribution", 'bright_red') . "\n"); + push @not_on_cpan, "$category/$pkg_name"; + next; + } + } + + my $dist_name = $dist->dist; + if ($dist_name ne $pkg_name) { + p(2, colored("$dist_name => $pkg_name", 'bright_yellow') . "\n"); + $name_mismatch{$dist_name} = $pkg_name; + } + + my $pkg_version = $atom->version . ''; + $pkg_version =~ s/-r\d+$//; + my $dist_version = $dist->version; + my $mapped_version = CPANPLUS::Dist::Gentoo::Maps::version_c2g( + undef, # default conversion + $dist_version, + ); + if ($mapped_version ne $pkg_version) { + my $str = "$dist_version => $mapped_version != $pkg_version"; + p(2, colored($str, 'bright_cyan') . "\n"); + } + $version{$dist_name} = [ $dist_version => $pkg_version ]; } } } -my $max = max map length, keys %gentooism; +my $already_parsed = 0; + +if (-e STATE_FILE) { + my $state = Storable::retrieve(STATE_FILE); + if ($state->[0] == $timestamp) { + printf "State retrieved from %s\n", STATE_FILE; + @not_on_cpan = @{ $state->[1] }; + @unfindable = @{ $state->[2] }; + @missing = @{ $state->[3] }; + %name_mismatch = %{ $state->[4] }; + %version = %{ $state->[5] }; + $already_parsed = 1; + } else { + printf "Obsolete state file %s, regenerating\n", STATE_FILE; + 1 while unlink STATE_FILE; + } +} -print STDERR +(keys %gentooism) . " gentooisms found\n"; +unless ($already_parsed) { + if (-e DATA_FILE) { + my $data = Storable::retrieve(DATA_FILE); + if ($data->[0] == $timestamp) { + printf "Data retrieved from %s\n", DATA_FILE; + %fetched_uri = %{ $data->[1] }; + } else { + printf "Obsolete data file %s, regenerating\n", DATA_FILE; + 1 while unlink DATA_FILE; + } + } + + parse_portage_tree(); + print "\n"; + + Storable::store([ + $timestamp, + \@not_on_cpan, + \@unfindable, + \@missing, + \%name_mismatch, + \%version, + ] => STATE_FILE); + printf "State stored to %s\n", STATE_FILE; +} + +print "\n"; +p(0, "Summary\n"); + +p(1, "Not on the CPAN:\n"); +p(2, "$_\n") for @not_on_cpan; + +p(1, "Alleged to be on the CPAN, but unfindable:\n"); +p(2, "$_\n") for @unfindable; + +p(1, "Only a different version is on the CPAN:\n"); +p(2, "$_\n") for @missing; + +p(1, "Name mismatch:\n"); +for my $dist_name (sort keys %name_mismatch) { + my $pkg_name = $name_mismatch{$dist_name}; + my $mapped_name = CPANPLUS::Dist::Gentoo::Maps::name_c2g($dist_name); + + my $fixed = $mapped_name eq $pkg_name; + my $eq = $fixed ? '==' : '!='; + my $str = colored( + "$dist_name => $mapped_name $eq $pkg_name", + $fixed ? 'bright_green' : 'bright_red' + ); + p(2, "$str\n"); +} + +p(1, "Version mismatch:\n"); +for (sort keys %version) { + my ($dist_version, $pkg_version) = @{$version{$_}}; + my $default_mapped_version = CPANPLUS::Dist::Gentoo::Maps::version_c2g( + undef, + $dist_version, + ); + my $mapped_version = CPANPLUS::Dist::Gentoo::Maps::version_c2g( + $_, + $dist_version, + ); + if ($default_mapped_version ne $pkg_version) { + my $fixed = $mapped_version eq $pkg_version; + my $eq = $fixed ? '==' : '!='; + my $str = colored( + "$dist_version => $mapped_version $eq $pkg_version", + $fixed ? 'bright_green' : 'bright_red' + ); + p(2, "$_: $str\n"); + } +} + +copy TARGET, BACKUP or die "copy failed: $!"; + +open my $src, '<', BACKUP; +open my $dst, '>', TARGET; + +my $max = max map length, keys %name_mismatch; + +SRC: while (<$src>) { + if (/^sub TIMESTAMP/) { + print $dst "sub TIMESTAMP () { $timestamp }\n"; + } elsif (/^__DATA__$/) { + print $dst "__DATA__\n"; + printf $dst "%s%s %s\n", $_, (' ' x ($max - length)), $name_mismatch{$_} + for sort keys %name_mismatch; + last SRC; + } else { + print $dst $_; + } +} -print "my %gentooism = (\n"; -printf " '%s'%s => '%s',\n", $_, (' ' x ($max - length)), $gentooism{$_} - for sort keys %gentooism; -print ");\n"; +print "\n" . +(keys %name_mismatch) . " name mismatches found\n";