コンテンツにスキップ

英文维基 | 中文维基 | 日文维基 | 草榴社区

利用者:Bcxfubot/exblog/prog

# URL張替
# [http://AAAA.exblog.jp/
# ↓
# [https://AAAA.exblog.jp/
import re
import time
import pywikibot

#target = "http://ameblo.jp/"
target_re = "http://[^\.]+\.exblog\.jp"
#max = 10
#max = 10
max = 1
sleepsec = 60

######################################################
# 処理モード
procmode = 0
#procmode = 1
######################################################

def replace_page(pagetitle):
    site = pywikibot.Site()
    page = pywikibot.Page(site, pagetitle)
    #text = page.text
    #print(text)
    linelist = page.text.split('\n')
    #print(linelist)

    gaibu = 0
    modflag = 0
    outtext = ""
    for line in linelist:
        #print(gaibu,line)
        #if "==外部リンク" in line:
        if re.search("==[ ]*外部リンク",line):
            gaibu = 1
        if gaibu == 1:
            #if target in line:
            if re.search( target_re, line):
                #print("debug1")
                pattern = r"http://[^ \|\]]+"
                matchedlist = re.findall( pattern, line)
                if matchedlist:
                    for url in matchedlist:
                        #print("url="+url)
                        if re.search( target_re, url):
                            #print("debug2")
                            url_https = url.replace( "http:", "https:")
                            line = line.replace( url, url_https)
                            print(gaibu,line)
                            modflag = 1
        outtext += line + "\n"

    if modflag == 1:
        page.text = outtext
        if procmode == 1:
            page.save("外部リンクの修正 " + target_https + " ([[Wikipedia:Bot|Bot]]による編集)")

# 処理対象のページ名をひとつ返す
# 処理対象がない場合は""を返す
def get_pagetitle():
    path = "list"
    with open(path) as f:
        for s_line in f:
            s_line = s_line.rstrip("\n")
            #print(s_line)
            #if not re.search(",sumi", s_line):
            if not s_line.endswith(",sumi"):
                return s_line
    return ""

# 処理した行にsumiをつける
def done_pagetitle(pagetitle):
    path = "list"
    alltext = ""
    with open(path) as f:
        for s_line in f:
            s_line = s_line.rstrip("\n")
            #print(s_line + "\n")
            #if re.search(pagetitle, s_line):
            if pagetitle == s_line:
                s_line = s_line + ",sumi"
            alltext += s_line + "\n"
    with open(path, mode='w') as f:
        f.write(alltext)
    return ""

def sub():
    num = 0
    for i in range(max):
        num = num + 1
        pagetitle = get_pagetitle()
        print("[" + str(num) + "/" + str(max) + "]" + ":" + "pagetitle=" + pagetitle)
        if pagetitle == "":
            break
        replace_page(pagetitle)
        done_pagetitle(pagetitle)
        
        if ( i < (max - 1) ):
            print("sleep(" + str(sleepsec) + ")")
            time.sleep(sleepsec)

def main():
    sub()
    print("done.")

if __name__ == '__main__':
    main()