利用者:VZP10224/citation-watchlist.js
表示
お知らせ: 保存した後、ブラウザのキャッシュをクリアしてページを再読み込みする必要があります。
多くの Windows や Linux のブラウザ
- Ctrl を押しながら F5 を押す。
Mac における Safari
Mac における Chrome や Firefox
- ⌘ Cmd と ⇧ Shift を押しながら R を押す。
詳細についてはWikipedia:キャッシュを消すをご覧ください。
/* Per-wiki configuration */
const LANGUAGE = 'ja';
const FAMILY = 'wikipedia';
const actionApiEndpoint = `https://${LANGUAGE}.${FAMILY}.org/w/api.php`;
const restApiEndpoint = `https://api.wikimedia.org/core/v1`;
const publicSuffixList = "Wikipedia:Citation_Watchlist/Public_Suffix_List";
const listOfLists = "Wikipedia:Citation_Watchlist/Lists";
const msgWarning = "Warning";
const msgCaution = "Caution";
const msgInspect = "Inspect";
const warnEmoji = '\u2757';
const cautionEmoji = '\u270B';
const inspectEmoji = '\uD83D\uDD0E';
const warnSectionHeader = "==Warn==";
const cautionSectionHeader = "==Caution==";
const inspectSectionHeader = "==Inspect==";
const delayMs = 50;
const maxRequestsPerHour = 400;
/*
Citation Watchlist Script – Highlights watchlist entries when questionable sources are added
author: Hacks/Hackers
license: GPL 4.0
*/
let publicSuffixSet = new Set();
let warnList = new Set();
let cautionList = new Set();
let inspectList = new Set();
let lastRequestTime = 0;
// The Wikimedia REST API has a hard request limit of 500 per hour, and no clear
// way to batch these requests. As such, we need to track our requests, and to do
// so globally across the whole session (not just a single instantiation of the
// script.)
if (!localStorage.getItem('citationWatchlistRestApiRequestCount')) {
localStorage.setItem('citationWatchlistRestApiRequestCount', '0');
}
setInterval(() => {
localStorage.setItem('citationWatchlistRestApiRequestCount', '0');
console.log("Request count reset");
}, 3600000);
function getRequestCount() {
const count = parseInt(localStorage.getItem('citationWatchlistRestApiRequestCount'), 10);
return isNaN(count) ? 0 : count;
}
function incrementRequestCount() {
const currentCount = getRequestCount();
localStorage.setItem('citationWatchlistRestApiRequestCount', (currentCount + 1).toString());
console.log(`Request count incremented to ${currentCount + 1}`);
}
function prependEmojiWithTooltip(element, emoji, domains, tooltipText) {
let processedType = '';
if (emoji === warnEmoji) {
processedType = 'warn';
} else if (emoji === cautionEmoji) {
processedType = 'caution';
} else if (emoji === inspectEmoji) {
processedType = 'inspect';
} else {
console.error('Unsupported emoji type');
return;
}
if (element.getAttribute(`data-processed-${processedType}`) === 'true') {
return;
}
const emojiSpan = document.createElement('span');
emojiSpan.textContent = emoji + " ";
emojiSpan.title = tooltipText + ": " + domains.join(", ");
element.parentNode.insertBefore(emojiSpan, element);
element.setAttribute(`data-processed-${processedType}`, 'true');
}
async function parseWatchlist() {
// Select all containers of the watchlist links to process them individually
const entriesContainers = document.querySelectorAll('.mw-changeslist-links');
const revisions = [];
const revisionIds = [];
let linkCounter = 0;
// Build map of previous revision IDs
for (const container of entriesContainers) {
const prevLink = container.querySelector('a.mw-history-histlinks-previous');
let urlParams = '';
if (prevLink) {
urlParams = new URLSearchParams(prevLink.href);
revisionIds.push(urlParams.get('oldid'));
}
}
console.log(revisionIds);
const previousRevisionMap = await fetchPreviousRevisionIds(revisionIds);
for (const container of entriesContainers) {
const diffLink = container.querySelector('a.mw-changeslist-diff');
const histLink = container.querySelector('a.mw-changeslist-history');
const prevLink = container.querySelector('a.mw-history-histlinks-previous');
const curLink = container.querySelector('a.mw-history-histlinks-current');
if (diffLink) {
// First we are checking if we are in recent changes / watchlist.
// If a "diff" link is found, process it
linkCounter += 1;
urlParams = new URLSearchParams(diffLink.href);
revisions.push({
oldrevision: urlParams.get('diff'),
newrevision: urlParams.get('oldid'),
element: diffLink.parentNode.parentNode
});
} else if (histLink) {
// If no "diff" link is found but a "hist" link is, process the "hist" link
linkCounter += 1;
urlParams = new URLSearchParams(histLink.href);
const pageID = urlParams.get('curid');
const firstID = await fetchFirstRevisionId(pageID);
revisions.push({
oldrevision: firstID,
element: histLink.parentNode.parentNode
});
} else if (prevLink) {
// At this point, check if we are on a page history rather than watchlist
linkCounter += 1;
urlParams = new URLSearchParams(prevLink.href);
revisions.push({
oldrevision: urlParams.get('oldid'),
newrevision: previousRevisionMap[urlParams.get('oldid')],
element: prevLink.parentNode.parentNode
});
} else if (curLink) {
// No prev link means we are at the page's first revision
// We do not actually want to compare to the current revision. We extract
// the oldid and treat like a new page.
linkCounter += 1;
urlParams = new URLSearchParams(curLink.href);
revisions.push({
oldrevision: urlParams.get('oldid'),
element: curLink.parentNode.parentNode
});
}
}
// Finally, to get to this point, you are on a page history with only
// one revision, and therefore no links of any kind. Extract first (and
// only) revision ID from page title.
if (linkCounter == 0) {
const pageID = mw.config.get('wgArticleId');
const firstID = await fetchFirstRevisionId(pageID);
revisions.push({
oldrevision: firstID,
element: entriesContainers[0]
});
}
return revisions;
}
function delay(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function buildURL(params) {
const url = new URL(actionApiEndpoint);
Object.keys(params).forEach(key => url.searchParams.append(key, params[key]));
return url;
}
function getRootDomain(hostname, publicSuffixSet) {
const domainParts = hostname.split('.');
for (let i = 0; i < domainParts.length; i++) {
const candidate = domainParts.slice(i).join('.');
if (publicSuffixSet.has(candidate) || publicSuffixSet.has(`!${candidate}`)) {
return domainParts.slice(i - 1).join('.');
}
}
return hostname;
}
function extractAddedURLs(addedParts) {
const addedURLs = [];
const urlRegex = /https?:\/\/[^\s<"]+/g;
let match;
while ((match = urlRegex.exec(addedParts)) !== null) {
try {
const url = new URL(match[0]);
addedURLs.push(url.href);
} catch (error) {
console.error(`Invalid URL rejected: ${match[0]}`);
}
}
return addedURLs;
}
async function fetchFromActionAPI(params) {
const url = await buildURL(params);
console.log(`Action API request: ${url}`);
const now = Date.now();
const elapsed = now - lastRequestTime;
if (elapsed < delayMs) {
await delay(delayMs - elapsed);
}
lastRequestTime = Date.now();
try {
const response = await fetch(url);
if (!response.ok) {
throw new Error(`Network response was not ok: ${response.statusText}`);
}
return await response.json();
} catch (error) {
console.error('Error fetching data from MediaWiki API:', error);
throw error;
}
}
async function fetchPublicSuffixList() {
const pslUrl = `https://${LANGUAGE}.${FAMILY}.org/wiki/${publicSuffixList}?action=raw`;
console.log(`Raw page text request: ${pslUrl}`);
try {
const response = await fetch(pslUrl);
const content = await response.text();
const suffixSet = new Set();
const lines = content.split('\n');
for (const line of lines) {
if (line.trim() && !line.trim().startsWith('//')) {
suffixSet.add(line.trim());
}
}
return suffixSet;
} catch (error) {
console.error("Error fetching Public Suffix List:", error);
return new Set();
}
}
async function fetchDiffFromAPI(apiUrl) {
if (getRequestCount() >= maxRequestsPerHour) {
console.warn("Request limit reached, waiting for reset...");
await delay(3600000); // Wait for an hour if the limit is reached
}
incrementRequestCount();
console.log(`Diff API request: ${apiUrl} (Request count: ${getRequestCount()})`);
try {
const response = await fetch(apiUrl);
const data = await response.json();
return data["source"] || data["diff"];
} catch (error) {
console.error('Error fetching API content:', error);
return null;
}
}
async function fetchDiffAndProcess(revisions) {
for (const revision of revisions) {
let apiUrl = `${restApiEndpoint}/${FAMILY}/${LANGUAGE}/revision/${revision.oldrevision}`;
if (revision.newrevision !== undefined) {
apiUrl += `/compare/${revision.newrevision}`;
}
const diff = await fetchDiffFromAPI(apiUrl);
let addedURLs = [];
if (Array.isArray(diff)) { // actual diffs are arrays; new pages are strings
// Types 2 and 4 represent "from".
// Types 1 and 5 represent "to".
// Type 3 represents changes within a line. It will be harder to extract URL changes in this case.
let fromURLs = [];
let toURLs = [];
for (const diffLine of diff) {
const lineURLs = extractAddedURLs(diffLine.text);
for (const URL of lineURLs) {
if (diffLine.type === 2 || diffLine.type === 4) {
fromURLs.push(URL);
} else if (diffLine.type === 1 || diffLine.type === 5) {
toURLs.push(URL);
}
}
}
const toURLSet = new Set(toURLs);
addedURLs = fromURLs.filter(url => !toURLSet.has(url));
} else {
addedURLs = extractAddedURLs(diff);
}
console.log(`Old revision: ${revision.oldrevision}
New revision: ${revision.newrevision}
API URL: ${apiUrl}
Revision element: ${revision.element.innerHTML}
Added URLs: ${addedURLs.join(' ')}
`);
const matchedWarnDomains = [];
const matchedCautionDomains = [];
const matchedInspectDomains = [];
for (const url of addedURLs) {
const hostname = new URL(url).hostname;
const domain = getRootDomain(hostname, publicSuffixSet);
if (warnList.has(domain) && !matchedWarnDomains.includes(domain)) {
matchedWarnDomains.push(domain);
} else if (cautionList.has(domain) && !matchedCautionDomains.includes(domain)) {
matchedCautionDomains.push(domain);
} else if (inspectList.has(domain) && !matchedInspectDomains.includes(domain)) {
matchedInspectDomains.push(domain);
}
}
if (matchedWarnDomains.length > 0) {
prependEmojiWithTooltip(revision.element, warnEmoji, matchedWarnDomains, msgWarning);
}
if (matchedCautionDomains.length > 0) {
prependEmojiWithTooltip(revision.element, cautionEmoji, matchedCautionDomains, msgCaution);
}
if (matchedInspectDomains.length > 0) {
prependEmojiWithTooltip(revision.element, inspectEmoji, matchedInspectDomains, msgInspect);
}
}
}
async function fetchAndOrganizeDomainLists(pageNames) {
const params = {
action: 'query',
prop: 'revisions',
titles: pageNames.join('|'), // Join all page names
rvprop: 'content',
rvslots: '*',
format: 'json',
origin: '*'
};
try {
const data = await fetchFromActionAPI(params);
const pages = data.query.pages;
const warnList = new Set();
const cautionList = new Set();
const inspectList = new Set();
for (const pageId in pages) {
const content = pages[pageId].revisions[0].slots.main['*'];
let currentList = null;
const lines = content.split('\n');
for (let line of lines) {
if (line.trim() === warnSectionHeader) {
currentList = warnList;
} else if (line.trim() === cautionSectionHeader) {
currentList = cautionList;
} else if (line.trim() === inspectSectionHeader) {
currentList = inspectList;
}
if (line.startsWith('*') && currentList) {
const domain = line.substring(1).trim();
currentList.add(domain);
}
}
}
return {
warnList,
cautionList,
inspectList
};
} catch (error) {
console.error('Error fetching or parsing the page content:', error);
throw error;
}
}
async function fetchPreviousRevisionIds(revisionIds) {
const params = {
action: 'query',
prop: 'revisions',
revids: revisionIds.join('|'), // join all revision IDs
rvprop: 'ids',
format: 'json',
origin: '*'
};
try {
const data = await fetchFromActionAPI(params);
const pages = data.query.pages;
const revisionMap = {};
for (const pageId in pages) {
const revisions = pages[pageId].revisions;
if (revisions && revisions.length > 0) {
for (const revision of revisions) {
revisionMap[revision.revid] = revision.parentid;
}
}
}
return revisionMap;
} catch (error) {
console.error('Error fetching previous revision IDs:', error);
return {};
}
}
async function fetchFirstRevisionId(pageID) {
const params = {
action: 'query',
pageids: pageID,
prop: 'revisions',
rvlimit: 1,
rvdir: 'newer',
format: 'json',
origin: '*'
};
try {
const data = await fetchFromActionAPI(params);
const pages = data.query.pages;
const pageId = Object.keys(pages)[0];
const revisions = pages[pageId].revisions;
if (revisions && revisions.length > 0) {
return revisions[0].revid;
} else {
throw new Error('No revisions found for this page.');
}
} catch (error) {
console.error('Error fetching first revision ID:', error);
return null;
}
}
async function fetchDomainListPages(pageName) {
const cacheKey = `citationWatchlistFetchDomainListPages_${pageName}`;
const cacheExpiration = 4 * 60 * 60 * 1000; // 4 hours in milliseconds
const now = Date.now();
const cachedData = localStorage.getItem(cacheKey);
const cachedTimestamp = localStorage.getItem(`${cacheKey}_timestamp`);
if (cachedData && cachedTimestamp && (now - parseInt(cachedTimestamp, 10)) < cacheExpiration) {
console.log("Loaded list of lists from cache");
return JSON.parse(cachedData);
} else {
const params = {
action: 'query',
prop: 'revisions',
titles: pageName,
rvprop: 'content',
rvslots: '*',
format: 'json',
origin: '*'
};
try {
const data = await fetchFromActionAPI(params);
const page = data.query.pages;
const pageId = Object.keys(page)[0];
const content = page[pageId].revisions[0].slots.main['*'];
const pageTitles = [];
const lines = content.split('\n');
for (let line of lines) {
if (line.startsWith('* [[')) {
const match = line.match(/\[\[([^\]]+)\]\]/); // Matches the first instance of [[Page Title]]
if (match) {
pageTitles.push(match[1]);
}
}
}
localStorage.setItem(cacheKey, JSON.stringify(pageTitles));
localStorage.setItem(`${cacheKey}_timestamp`, now.toString());
console.log("Loaded from API and stored in cache");
return pageTitles;
} catch (error) {
console.error('Error fetching or parsing the page content:', error);
throw error;
}
}
}
async function runScript() {
publicSuffixSet = await fetchPublicSuffixList();
if (publicSuffixSet.size === 0) {
console.error('Public Suffix List loading failed');
return;
}
console.log("Welcome to Citation Watchlist");
const listPages = await fetchDomainListPages(listOfLists);
try {
const lists = await fetchAndOrganizeDomainLists(listPages);
lists.warnList.forEach(warnList.add, warnList);
lists.cautionList.forEach(cautionList.add, cautionList);
lists.inspectList.forEach(inspectList.add, inspectList);
} catch (error) {
console.error('Error fetching domain lists:', error);
}
const watchlistRevisions = await parseWatchlist();
await fetchDiffAndProcess(watchlistRevisions);
}
runScript().then(() => console.log('Citation Watchlist script finished executing'));