利用者:Bcxfubot/BOT作業依頼/log/20210222/chousa1/prog
表示
#!/usr/bin/perl
use strict;
use warnings;
use utf8;
use Encode;
use LWP::Simple;
use LWP::UserAgent;
#use LWP::Protocol::https
#binmode( STDOUT, ":utf8" );
binmode( STDOUT, ":encoding(UTF-8)" );
#use Jcode;
use Getopt::Std;
require 'subs_my.pl';
require 'all_check.pl';
require 'mini_check.pl';
# 標準出力のバッファリングを無効に
$| = 1;
our %opts = ();
our $incomment = 0;
#------------------------------------------------------------------
# sub
#------------------------------------------------------------------
# multi byte 文字列として取り扱う
sub str_to_byte
{
my( $sIn, $encoding ) = @_;
my( $sOut, $len, $i );
$sOut = '';
$sIn = encode( $encoding, $sIn );
$len = length( $sIn );
for( $i=0; $i<$len; ++$i ){
$sOut .= unpack( "H2", substr( $sIn, $i, 1 ) ) . ' ';
}
chop( $sOut );
return $sOut;
}
#------------------------------------------------------------------
# main
#------------------------------------------------------------------
sub main {
my $dumpfile;
$dumpfile = $ARGV[0];
print $dumpfile, "\n";
#open DUMP, "<$dumpfile";
#open( DUMP, "<:utf8", "$dumpfile" );
open( DUMP, "<:encoding(UTF-8)", "$dumpfile" );
#my $inpage = 0;
#my $inblock = 0;
my @page = ();
my $title = "";
my $id = "";
my $ns = "";
my $comment = "";
my $lineno = 0;
my $count = 0;
my $flag1 = 0;
my $flag2 = 0;
my $prevline = "";
my $prev2line = "";
my $inpage = 0;
my $inblock = 0;
my $passpage = 0;
my $gaibu = 0;
my $httpcount = 0;
my $httpscount = 0;
while (<DUMP>) {
#print $_;
if ( $inpage == 0 ) {
if ( $_ =~ /<page>/ ) {
$inpage = 1;
push( @page, $_ );
next;
}
}
if ( defined $opts{'c'} ) {
if ( $opts{'c'} == 1 ) {
if ( $inpage == 1 ) {
push( @page, $_ );
}
}
}
if ( $_ =~ /<\/page>/ ) {
if ( defined $opts{'c'} ) {
if ( $opts{'c'} == 1 ) {
&all_check(@page);
#&mini_check(@page);
}
}
$inpage = 0;
$inblock = 0;
#$inkyaku = 0;
#print @page, "\n";
$lineno = 0;
$count = 0;
$flag1 = 0;
$flag2 = 0;
@page = ();
$incomment = 0;
$prevline = "";
$prev2line = "";
$passpage = 0;
$title = "";
$id = "";
$ns = "";
$comment = "";
$gaibu = 0;
next;
}
if ( $inpage == 0 ) {
next;
}
if ( $title eq "" ) {
if ( $_ =~ /<title>(.*)<\/title>/ ) {
$title = $1;
#print "title=[[$title]]\n";
if ( &is_avoid_page($title) ) {
$passpage = 1;
}
=comment
if ( $title =~ /BOOWY/ ||
$title =~ /BOØWY/ ) {
$flag1 = 1;
}
=cut
=comment
if ( $title =~ /^Template:/ ) {
$flag1 = 1;
}
=cut
next;
}
}
if ( $id eq "" ) {
if ( $_ =~ /<id>(.*)<\/id>/ ) {
$id = $1;
next;
}
}
if ( $ns eq "" ) {
if ( $_ =~ /<ns>(.*)<\/ns>/ ) {
$ns = $1;
next;
}
}
if ( $_ =~ /<comment>(.*)/ ) {
$comment = $1;
next;
}
if ( $_ =~ /<sha1>(.*)<\/sha1>/ ) {
next;
}
if ( $_ =~ /(.*)<\/comment>/ ) {
$comment = $1;
next;
}
if ( $passpage == 1 ) {
next;
}
if ( defined $opts{'c'} ) {
if ( $opts{'c'} == 1 ) {
next;
}
}
if ( $_ =~ /xml:space="preserve">/ ) {
$lineno = 0;
#print "before[$_]\n";
$_ =~ s/.*xml:space="preserve">//;
#print "after[$_]\n";
}
$_ =~ s/</</g;
$_ =~ s/>/>/g;
$_ =~ s/"/"/g;
$_ =~ s/'/'/g;
$_ =~ s/&/&/g;
chomp();
$lineno++;
#&remove_comment($_);
#if ( $_ =~ /https?:\/\/web\.archive\.org\/2/ ) {
#if ( $_ =~ /(<[^\>]+<)/ ) {
#if ( $_ =~ /(<r[^\>]+<)/ ) {
#if ( $_ =~ /<refname/ ) {
#if ( $_ =~ /http:\/\/ameblo\.jp\/.*\/day/ ) {
if ( $_ =~ /ディアゴスティーニ/ ) {
print "* [[:$title]]\n";
print "*: <nowiki>[$_]</nowiki>\n";
}
#$prevline = $_;
}
close DUMP;
}
getopts( "c", \%opts );
&main();
# EOF