利用者:Mizusumashi/Bot/villagepump.py
表示
< 利用者:Mizusumashi | Bot
# -*- coding: utf-8 -*-
"""
This script is under public domain, and comes with ABSOLUTELY NO WARRANTY.
You can use/modify/redistribute without any permission.
このスクリプトの著作権は放棄され、このスクリプトはパブリックドメインにあります。
このスクリプトは完全に無保証です。このスクリプトの動作については全く何も保証されておらず、このスクリプトを使用したいかなる結果についても責任は負われません。
このスクリプトは、pywikipedia フレームワークを使用しています。
このスクリプトは、[[Wikipedia:井戸端]]から話題を除去し、サブページに複製します。
"""
import string, time, datetime, re
import wikipedia
#
# Utilities to escape title
#
ec = {
ur'\t': ur' ',
ur'#': ur'#',
ur'<': ur'<',
ur'>': ur'>',
ur'[': ur'「',
ur']': ur'」',
ur'|': ur'|',
ur'{': ur'{',
ur'}': ur'}',
ur'/': ur'/',
ur'(': ur'(',
ur')': ur')'
}
def escape(c):
if not c:
return ''
return ec.get(c, c)
def comment(rest):
while rest:
m = re.match(r'-->(.*)', rest)
if m:
return m.group(1)
rest = rest[1:]
return ''
def nowiki(rest):
inner = ''
while rest:
m = re.match(r'</\s*nowiki(?:\s+[^>]+)?>(.*)', rest)
if m:
return (inner, m.group(1))
inner = inner + escape(rest[0])
rest = rest[1:]
return (inner, '')
def escapeTitle(s):
r = ''
while s:
m = re.match(ur'<!--(.*)', s)
if m:
s = comment(m.group(1))
continue
m = re.match(ur'<\s*nowiki(?:\s+[^>]+)?/>(.*)', s)
if m:
s = m.group(1)
continue
m = re.match(ur'<\s*nowiki(?:\s+[^>]+)?>(.*)', s)
if m:
inner, rest = nowiki(m.group(1))
r += inner
s = rest
continue
m = re.match(ur'\[\[[^|]+\|([^]]+)\]\](.*)', s)
if m:
r += escapeTitle(m.group(1))
s = m.group(2)
continue
m = re.match(ur'\[\[([^]]+)\]\](.*)', s)
if m:
r += escapeTitle(m.group(1))
s = m.group(2)
continue
m = re.match(ur'\[\S+ ([^]]+)\](.*)', s)
if m:
r += escapeTitle(m.group(1))
s = m.group(2)
continue
m = re.match(ur'\[([^]]+)\](.*)', s)
if m:
r += escapeTitle(m.group(1))
s = m.group(2)
continue
m = re.match(ur"\'\'\'(.*)", s)
if m:
s = m.group(1)
continue
m = re.match(ur"\'\'(.*)", s)
if m:
s = m.group(1)
continue
r = r + escape(s[0])
s = s[1:]
return r
#
# Bot
#
class Error(Exception):
"""Error"""
class DateException(Exception):
def __init__(self, msg):
self.msg = msg
def __str__(self):
return 'Exception: ' + self.msg
class VillagePumpBot(object):
def __init__(self,
target = u'Wikipedia:井戸端',
subpagePrefix = u'Wikipedia:井戸端/subj/',
transcludionTemplate = u'井戸端サブページ',
headerTemplate = u'井戸端サブページ/ヘッダ',
changedComment = u'ロボットによる変更: ${edittime}(UTC)版より更新。',
subpagedComment = u'「${subpagePrefix}」配下へ分割(サブページ化)。',
removedComment = u'タイムアウトしたサブページの読み込み解除。',
styledComment = u'スタイル調整。',
subpagecomment = u'ロボットによる作成: '
u'[[${target}]] '
u'${edittime}(UTC)版より分割(サブページ化)。',
firstEditTimeOut = 30,
lastEditTimeOut = 10,
test = False):
self.site = wikipedia.getSite()
self.target = target
self.subpagePrefix = subpagePrefix
self.transcludionTemplate = transcludionTemplate
self.headerTemplate = headerTemplate
self.changedComment = string.Template(changedComment).substitute
self.subpagedComment = string.Template(subpagedComment).substitute
self.removedComment = removedComment
self.styledComment = styledComment
self.subpagecomment = string.Template(subpagecomment).substitute
self.firstEditTimeOut = firstEditTimeOut
self.lastEditTimeOut = lastEditTimeOut
self.test = test
def getSections(self, text):
regex = r'^==(?!=)\s*(?P<title>\S(.*\S)?)\s*==(?!=)\s*' +\
r'^(?P<body>' +\
r'(.|\n(?!\s*(' +\
r'(^==(?!=)\s*\S(.*\S)?\s*==(?!=))' +\
r'|' +\
r'^({{\s*((T|t)emplate:)?' + re.escape(self.transcludionTemplate) + '[^}]*}})'\
r')))*' +\
r')'
result = []
for s in re.finditer(regex, text, re.UNICODE | re.MULTILINE):
result += [s]
return result
def getTranscludions(self, text):
regex = r'{{\s*' +\
r'((T|t)emplate:)?' + re.escape(self.transcludionTemplate) +\
r'(?P<params>[^}]*)' +\
r'}}'
result = []
for s in re.finditer(regex, text, re.UNICODE | re.MULTILINE):
result += [s]
return result
def getParams(self, params):
regex = r'(?P<key>[^\s|=]([^|=]*[^\s|=])?)' +\
r'\s*=\s*' +\
r'(?P<value>[^\s|=]([^|=]*[^\s|=])?)'
result = {}
for param in re.finditer(regex, params, re.UNICODE):
result[param.group('key')] = param.group('value')
return result
def putPage(self, page, text, comment):
if not self.test:
page.put(text, comment)
else:
print '***TEST***'
print 'title: >>' + page.title() + '<<'
print 'comment: >>' + comment + '<<'
print 'text: >>' + text + '<<'
class UTC(datetime.tzinfo):
def utcoffset(self, dt):
return datetime.timedelta(0)
def dst(self, dt):
return datetime.timedelta(0)
def tzname(self, dt):
return "UTC"
def getEditDate(self, page):
regex = r'(?P<year>\d{4})' +\
r'(?P<month>\d{2})' +\
r'(?P<day>\d{2})' +\
r'(?P<hour>\d{2})' +\
r'(?P<min>\d{2})' +\
r'(?P<sec>\d{2})'
match = re.match(regex, page.editTime())
return datetime.datetime(int(match.group('year')),
int(match.group('month')),
int(match.group('day')),
int(match.group('hour')),
int(match.group('min')),
int(match.group('sec')),
0, self.UTC())
def getTagDate(self, text):
regex = r'{{\s*' +\
r'((T|t)emplate:)?' + re.escape(self.headerTemplate) +\
r'\s*\|\s*date\s*=\s*' +\
r'(?P<year>\d{4})-(?P<month>\d{1,2})-(?P<day>\d{1,2})\s+' +\
r'(?P<hour>\d{1,2}):(?P<min>\d{1,2})' +\
r'[^}]*}}'
match = re.search(regex, text, re.UNICODE | re.MULTILINE)
if not match:
raise DateException('getTagDate')
return datetime.datetime(int(match.group('year')),
int(match.group('month')),
int(match.group('day')),
int(match.group('hour')),
int(match.group('min')),
0, 0, self.UTC())
def sleep(self):
throttle = wikipedia.put_throttle
throttle.lock.acquire()
try:
waittime = throttle.waittime()
if waittime > 1:
time.sleep(waittime)
finally:
throttle.lock.release()
def printEscapedTitle(self, text):
wikipedia.output('Escaped Tiltes:')
i = 0
for match in self.getSections(text):
title = match.group('title')
wikipedia.output(' original tilte : ' + title)
wikipedia.output(' escaped tilte : ' + escapeTitle(title))
i += 1
if i == 0:
wikipedia.output(" There is no section title to create subpage.")
elif i == 1:
wikipedia.output(" There is one title of section to create subpage.")
wikipedia.output(" If escaped title is wrong, kill this process.")
else:
wikipedia.output(" There is some titles of sections to create subpages.")
wikipedia.output(" If escaped title is wrong, kill this process.")
wikipedia.input('Press any key to continue...')
def removeIncludions(self, text):
removed = False
now = datetime.datetime.now(self.UTC())
for match in self.getTranscludions(text):
transcludion = match.group(0)
params = self.getParams(match.group('params'))
if 'subpage' in params:
subpage = params['subpage']
elif 'title' in params:
subpage = self.subpagePrefix + params['title']
else:
wikipedia.output('ERROR: no params \'subpage\' or \'title\'.')
continue
page = wikipedia.Page(self.site, subpage)
if not page.exists():
wikipedia.output('ERROR: subpage does not exist: ' + subpage)
continue
lastEdit = self.getEditDate(page)
if now < lastEdit + datetime.timedelta(self.lastEditTimeOut):
try:
firstEdit = self.getTagDate(page.get())
if now < firstEdit + datetime.timedelta(self.firstEditTimeOut):
wikipedia.output('.')
continue
except DateException:
wikipedia.output('Exception: tag\'s date does not match: ' + subpage)
continue
wikipedia.output('Removing includion [[' + subpage + ']]...')
text = re.sub(r'\s*' + re.escape(transcludion) + r'\s*', '\n', text)
removed = True
return text, removed
def transToSubpages(self, text, edit):
def put(subpage):
self.sleep()
page = wikipedia.Page(self.site, subpage)
if not page.exists():
self.putPage(page, body, comment)
return True
else:
return False
subpaged = False
for match in self.getSections(text):
section, title, body = match.group(0, 'title', 'body')
body = '<noinclude>{{\n' +\
' ' + self.headerTemplate + '\n' +\
' | date = {{subst:#time:Y-m-d H:i:s}}\n' +\
'}}</noinclude>\n' +\
'== ' + title + '==\n' +\
body
comment = self.subpagecomment(target = self.target,
edittime = edit.strftime('%Y-%m-%d %H:%M:%S'))
subpageMain = self.subpagePrefix + escapeTitle(title)
subpagePostfix = ' ' + time.strftime(u"%Y%m%d", time.gmtime())
subpage = subpageMain
if not put(subpage):
subpabe = subpageMain + subpagePostfix
i = 0
while not put(subpage):
subpabe = subpageMain + \
subpagePostfix + \
'%s-%d' % (postfix, i)
i += 1
if subpage == self.subpagePrefix + title:
new = '{{ ' + self.transcludionTemplate + ' | title = ' + title + ' }}'
else:
wikipedia.output('title is changed: \'' \
+ title \
+ '\' -> \'' \
+ subpage \
+ '\'')
new = '{{\n' +\
' ' + self.transcludionTemplate + '\n' +\
' | title = ' + title + '\n' +\
' | subpage = ' + subpage + '\n' +\
'}}'
text = re.sub(r'\s*' + re.escape(section) + r'\s*', '\n' + new + '\n', text)
subpaged = True
return text, subpaged
def doStyle(self, text):
oldText = text
for match in self.getTranscludions(text):
old = match.group(0)
new = old
params = self.getParams(match.group('params'))
if len(params) == 0:
new = '{{ %s }}' % self.transcludionTemplate
elif len(params) == 1:
key, value = params.popitem()
new = '{{ %s | %s = %s }}' \
% (self.transcludionTemplate, key, value)
else:
new = '{{\n ' + self.transcludionTemplate + '\n'
if 'title' in params:
new += ' | title = %s\n' % params['title']
if 'subpage' in params:
new += ' | subpage = %s\n' % params['subpage']
new += '}}'
text = re.sub(r'\s*' + re.escape(old) + r'\s*',
'\n' + new + '\n',
text)
return text, (oldText != text)
def run(self):
if self.test:
wikipedia.output('***** TEST MODE ******')
page = wikipedia.Page(self.site, self.target)
text = page.get()
edit = self.getEditDate(page)
self.printEscapedTitle(text)
text, removed = self.removeIncludions(text)
text, subpaged = self.transToSubpages(text, edit)
text, styled = self.doStyle(text)
if (not removed) and (not subpaged) and (not styled):
wikipedia.output("Not removed, Not subpaged, Not styled.")
return
comment = self.changedComment(edittime = edit.strftime('%Y-%m-%d %H:%M:%S'))
if subpaged:
comment += self.subpagedComment(subpagePrefix = self.subpagePrefix)
if removed:
comment += self.removedComment
if styled:
comment += self.styledComment
self.putPage(page, text, comment)
#
# Main function for front end
#
class ParamError(Error):
"""Command line error"""
class ConfigNotExist(ParamError):
def __init__(self, name):
self.name = name
def __str__(self):
return 'Error: module does not exist: ' + self.name
class ConfigNotStr(ParamError):
def __init__(self, param, module):
self.param = param
self.module = module
def __str__(self):
return 'Error: config parameter is not string: %s in %s' % (self.param, self.module.name)
class ConfigNotNum(ParamError):
def __init__(self, param, module):
self.param = param
self.module = module
def __str__(self):
return 'Error: config parameter is not number: %s in %s' % (self.param, self.module.name)
class ConfigNotBool(ParamError):
def __init__(self, param, module):
self.param = param
self.module = module
def __str__(self):
return 'Error: config parameter is not bool: %s in %s' % (self.param, self.module.name)
class ConfigUnknown(ParamError):
def __init__(self, param, module):
self.param = param
self.module = module
def __str__(self):
return 'Error: config parameter is unknown: %s in &s' % (self.param, self.module.name)
class CommandLineParamDuplication(ParamError):
def __init__(self, param):
self.param = param
def __str__(self):
return 'Error: command line parameter is set twice: ' + self.param
class CommandLineParamNotNum(ParamError):
def __init__(self, param):
self.param = param
def __str__(self):
return 'Error: command line parameter is not number: ' + self.param
class CommandLineParamNotBool(ParamError):
def __init__(self, param):
self.param = param
def __str__(self):
return 'Error: command line parameter is not bool: ' + self.param
class CommandLineParamUnknown(ParamError):
def __init__(self, param):
self.param = param
def __str__(self):
return 'Error: command line parameter is unknown: ' + self.param
def main():
params = {}
def checkConfigs(name, ignoreNotExist):
try:
module = __import__(name)
reload(module)
configs = module.__dict__
except ImportError:
if ignoreNotExist:
return
else:
raise ConfigNotExist(name)
for key in configs.iterkeys():
def checkConfigStr(param):
if key == param:
if type(configs[key]) == str \
or type(configs[key]) == unicode:
params[key] = unicode(configs[key])
return True
else:
raise ConfigNotStr(config, module)
else:
return False
def checkConfigNum(param):
if key == param:
if type(configs[key]) == int \
or type(configs[key]) == float:
params[key] = configs[key]
return True
else:
raise ConfigNotNun(config, module)
else:
return False
def checkConfigBool(param):
if key == param:
if type(configs[key]) == bool:
params[key] = configs[key]
return True
else:
raise ConfigNotBool(config, module)
else:
return False
if checkConfigStr('target'): continue
if checkConfigStr('subpagePrefix'): continue
if checkConfigStr('transcludionTemplate'): continue
if checkConfigStr('headerTemplate'): continue
if checkConfigStr('changedComment'): continue
if checkConfigStr('subpagedComment'): continue
if checkConfigStr('removedComment'): continue
if checkConfigStr('styledComment'): continue
if checkConfigStr('subpagecomment'): continue
if checkConfigStr('headerTemplate'): continue
if checkConfigNum('firstEditTimeOut'): continue
if checkConfigNum('lastEditTimeOut'): continue
if checkConfigBool('test'): continue
checkConfigs('villagepump_config', True)
args = set()
for arg in wikipedia.handleArgs():
def checkArgConfig(param):
if arg[:len(param) + 2] == '-' + param + ':':
checkConfigs(arg[len(param) + 2:], False)
return True
else:
return False
def checkArgStr(param):
if arg[:len(param) + 2] == '-' + param + ':':
if param in args:
raise CommandLineParamDuplication(param)
args.add(param)
params[param] = unicode(arg[len(param) + 2:])
return True
else:
return False
def checkArgNum(param):
if checkArgStr(param):
try:
x = int(params[param])
params[param] = x
return True
except ValueError:
pass
try:
x = float(params[param])
params[param] = x
return True
except ValueError:
pass
raise CommandLineParamNotNum(param)
else:
return False
def checkArgBool(param):
if checkArgStr(param):
if params[param] in [u'TRUE',
u'True',
u'true',
u'ON',
u'On',
u'on',
u'YES',
u'Yes',
u'yes'] :
params[param] = True
return True
if params[param] in [u'False',
u'false',
u'OFF',
u'Off',
u'off',
u'NO',
u'No',
u'no'] :
params[param] = False
return True
raise CommandLineParamNotBool(param)
else:
return False
if checkArgConfig('config'): continue
if checkArgStr('target'): continue
if checkArgStr('subpagePrefix'): continue
if checkArgStr('transcludionTemplate'): continue
if checkArgStr('headerTemplate'): continue
if checkArgStr('changedComment'): continue
if checkArgStr('subpagedComment'): continue
if checkArgStr('removedComment'): continue
if checkArgStr('styledComment'): continue
if checkArgStr('subpagecomment'): continue
if checkArgNum('firstEditTimeOut'): continue
if checkArgNum('lastEditTimeOut'): continue
if checkArgBool('test'): continue
raise CommandLineParamUnknown(arg)
bot = VillagePumpBot(**params)
bot.run()
if __name__ == '__main__':
try:
main()
except ParamError, e:
wikipedia.output(str(e))
finally:
wikipedia.stopme()