利用者:Bcxfubot/ウェブアーカイブ/prog
表示
< 利用者:Bcxfubot | ウェブアーカイブ
# URL張替
# http://web.archive.org/web/
# ↓
# https://web.archive.org/web/
import re
import time
import pywikibot
#target = "http://ameblo.jp/"
target = "http://web.archive.org/web/"
target_https = "https://web.archive.org/web/"
#max = 10
#max = 10
max = 1
sleepsec = 60
######################################################
# 処理モード
#procmode = 0
procmode = 1
######################################################
def replace_page(pagetitle):
site = pywikibot.Site()
page = pywikibot.Page(site, pagetitle)
#text = page.text
#print(text)
linelist = page.text.split('\n')
#print(linelist)
gaibu = 0
modflag = 0
outtext = ""
for line in linelist:
#print(gaibu,line)
#if "==外部リンク" in line:
#if re.search("==[ ]*外部リンク",line):
# gaibu = 1
#if gaibu == 1:
if target in line:
#print(gaibu,line)
line = line.replace( target, target_https)
print(gaibu,line)
modflag = 1
outtext += line + "\n"
if modflag == 1:
page.text = outtext
if procmode == 1:
page.save("外部リンクの修正 " + target_https + " ([[Wikipedia:Bot|Bot]]による編集)")
# 処理対象のページ名をひとつ返す
# 処理対象がない場合は""を返す
def get_pagetitle():
path = "list"
with open(path) as f:
for s_line in f:
s_line = s_line.rstrip("\n")
#print(s_line)
#if not re.search(",sumi", s_line):
if not s_line.endswith(",sumi"):
return s_line
return ""
# 処理した行にsumiをつける
def done_pagetitle(pagetitle):
path = "list"
alltext = ""
with open(path) as f:
for s_line in f:
s_line = s_line.rstrip("\n")
#print(s_line + "\n")
#if re.search(pagetitle, s_line):
if pagetitle == s_line:
s_line = s_line + ",sumi"
alltext += s_line + "\n"
with open(path, mode='w') as f:
f.write(alltext)
return ""
def sub():
num = 0
for i in range(max):
num = num + 1
pagetitle = get_pagetitle()
print("[" + str(num) + "/" + str(max) + "]" + ":" + "pagetitle=" + pagetitle)
if pagetitle == "":
break
replace_page(pagetitle)
done_pagetitle(pagetitle)
if ( i < (max - 1) ):
print("sleep(" + str(sleepsec) + ")")
time.sleep(sleepsec)
def main():
sub()
print("done.")
if __name__ == '__main__':
main()