Geavaheaddji:Trondtr/MediaWiki:Gadget-page-authors-simple.js

Wikipedia'as/is

Fuomáš: Maŋŋel go almmuhat, soaitá leat dárbbašlaš sihkkut neahttalohkkii gaskaráju vai oainnat rievdadusaid. 

  • Firefox / Safari: Doala Shift dan botta go deattát Reload, dahje deaddil Ctrl-F5 dahje Ctrl-R (⌘-R Mac'as)
  • Google Chrome: Deaddil Ctrl-Shift-R (⌘-Shift-R Mac'as)
  • Internet Explorer / Edge: Doala Ctrl dan botta go deattát Álggat ođđasit, dahje deaddil Ctrl-F5
  • Opera: deaddil Ctrl-F5.
var wgPageAuthors = {
	query: { action: 'query', prop: 'revisions', titles: wgPageName, rvlimit:10, rvprop: 'ids|timestamp|user|size|content', format: 'json'},
	
	inProgress: false,
	isOpen: false,

	revisions: Array(),

	str: null,

	toHex: function (n) {
		if (n < 0) n = 0xFFFFFFFF + n + 1;
		return n.toString(16);
	},

	fnv: function (str) {
		// this is an implementation of the Fowler-Noll-Vo hash algorithm
		var hash = 2166136261; // the 32 bit offset
		for (var i = 0; i < str.length; i++) {
			hash += (hash << 1) + (hash << 4) + (hash << 7) + (hash << 8) + (hash << 24);
			hash = hash ^ str.charCodeAt(i);
		}
		return hash & 0x0ffffffff;
	},

	perm: [ // permutations for the lsh algorithm
		1, 14, 110, 25, 97, 174, 132, 119, 138, 170, 125, 118, 27, 233, 140, 51,
		87, 197, 177, 107, 234, 169, 56, 68, 30, 7, 173, 73, 188, 40, 36, 65,
		49, 213, 104, 190, 57, 211, 148, 223, 48, 115, 15, 2, 67, 186, 210, 28,
		12, 181, 103, 70, 22, 58, 75, 78, 183, 167, 238, 157, 124, 147, 172, 144,
		176, 161, 141, 86, 60, 66, 128, 83, 156, 241, 79, 46, 168, 198, 41, 254,
		178, 85, 253, 237, 250, 154, 133, 88, 35, 206, 95, 116, 252, 192, 54, 221,
		102, 218, 255, 240, 82, 106, 158, 201, 61, 3, 89, 9, 42, 155, 159, 93,
		166, 80, 50, 34, 175, 195, 100, 99, 26, 150, 16, 145, 4, 33, 8, 189,
		121, 64, 77, 72, 208, 245, 130, 122, 143, 55, 105, 134, 29, 164, 185, 194,
		193, 239, 101, 242, 5, 171, 126, 11, 74, 59, 137, 228, 108, 191, 232, 139,
		6, 24, 81, 20, 127, 17, 91, 92, 251, 151, 225, 207, 21, 98, 113, 112,
		84, 226, 18, 214, 199, 187, 13, 32, 94, 220, 224, 212, 247, 204, 196, 43,
		249, 236, 45, 244, 111, 182, 153, 136, 129, 90, 217, 202, 19, 165, 231, 71,
		230, 142, 96, 227, 62, 179, 246, 114, 162, 53, 160, 215, 205, 180, 47, 109,
		44, 38, 31, 149, 135, 0, 216, 52, 63, 23, 37, 69, 39, 117, 146, 184,
		163, 200, 222, 235, 248, 243, 219, 10, 152, 131, 123, 229, 203, 76, 120, 209 ],
 	lsh: function (str) {
		// this is an implementation of a lsh algorithm, loosley similar to nilsimsa
		var arr = Array();
		for (var i = 0; i <= 0x0ff; i++) {
			arr[i] = 0;
		}
		for (var i = 0; i < str.length-2; i++) {
			 // first we do a Pearson hash of trigram from the text
			 var hash = 0;
			 hash = wgPageAuthors.perm[ (hash ^ str.charCodeAt(i)) & 0x0ff ];
			 hash = wgPageAuthors.perm[ (hash ^ str.charCodeAt(i+1)) & 0x0ff ];
			 hash = wgPageAuthors.perm[ (hash ^ str.charCodeAt(i+2)) & 0x0ff ];
			 // then we accumulate in the array at indices given by the hash
			 arr[hash]++;
		}
		// fold the array
		for (var i = 0; i <= 0x01f; i++) {
			arr[i] += arr[i+0x020] + arr[i+0x040] + arr[i+0x060] + arr[i+0x080] + arr[i+0x0a0] + arr[i+0x0c0] + arr[i+0x0e0];
		}
		var acc = 0;
		var lim = arr.slice(0, 0x01f).sort(function(a,b){return a-b})[0x00f];
		for (var i = 0; i <= 0x01f; i++) {
			acc |= (((lim < arr[i]) ? 0 : 1) << i);
		}
		return acc & 0x0ffffffff;
	},

	rvec: function (str) {
		// this is an implementation of a subspace reduction algorithm based upon the Pearson hash
		var arr = new Array();
		for (var i = 0; i <= 0x0ff; i++) {
			arr[i] = 0;
		}
		for (var i = 0; i < str.length-2; i++) {
			 // first we do a Pearson hash of trigram from the text
			 var hash = 0;
			 hash = wgPageAuthors.perm[ (hash ^ str.charCodeAt(i)) & 0x0ff ];
			 hash = wgPageAuthors.perm[ (hash ^ str.charCodeAt(i+1)) & 0x0ff ];
			 hash = wgPageAuthors.perm[ (hash ^ str.charCodeAt(i+2)) & 0x0ff ];
			 // then we accumulate in the array at indices given by the hash
			 arr[hash]++;
		}
		return arr;
	},

 	beans: function (n) {
		// implementation of a bean counter
		// make an unsigned 32bits int
		if (n < 0) n = 0xFFFFFFFF + n + 1;
		// From Hacker's Delight, p. 66, Figure 5-2
		n = n - ((n >> 1) & 0x55555555);
		n = (n & 0x33333333) + ((n >> 2) & 0x33333333);
		n = (n + (n >> 4)) & 0x0F0F0F0F;
		n = n + (n >> 8);
		n = n + (n >> 16);
		return n & 0x0000003F;
	},

	cosine: function (vec1, vec2) {
		// really just a difference measure for the trigram vectors
		var combined = Object();
		for (var x in vec1)
			combined[x] = vec1[x];
		for (var x in vec2) {
			if (combined[x] == null) combined[x] = 0;
			combined[x] -= vec2[x];
		}
		var acc = 0;
		for (var x in combined)
			acc += combined[x] * combined[x];
		return Math.sqrt(acc);
	},

	rcos: function (vec1, vec2) {
		// really just a difference measure for the trigram vectors
		var acc = 0;
		for (var i = 0; i <= 0x0ff; i++)
			acc += Math.pow((vec1 ? vec1[i] : 0) - (vec2 ? vec2[i] : 0), 2);
		return Math.sqrt(acc);
	},

	onclick: function () {
		// just an onclick handler to start processing
		if (wgPageAuthors.str === null) {
			if (!wgPageAuthors.inProgress) {
				wgPageAuthors.inProgress = true;
				wgPageAuthors.isOpen = true;
				jsMsg('<h3>Forfattarar</h3><small id="progress" />', 'page-authors' );
				$('#mw-js-message.mw-js-message-page-authors h3').each(function(i, el) { injectSpinner( el, 'page-authors' ); });
				wgPageAuthors.getRevisions();
			}
			else {
				alert('Already in progress!');
			}
		}
		else {
			if (!wgPageAuthors.isOpen) {
				wgPageAuthors.isOpen = true;
				jsMsg('<h3>Forfattarar</h3>' + wgPageAuthors.str, 'page-authors' );
				$('#mw-js-message.mw-js-message-page-authors').show().find('ul').slideDown(1000);

			}
			else {
				wgPageAuthors.isOpen = false;
				$('#mw-js-message.mw-js-message-page-authors').slideUp(1000);
			}
		}
	},

	getRevisions: function () {
		// ajax handler
		$.getJSON(wgScriptPath + "/api.php", wgPageAuthors.query, function(data) {
			var r = data['query'].pages[wgArticleId].revisions;
			for (var x in r) {
				r[x].lsh = wgPageAuthors.lsh(r[x]['*']);
				r[x].fnv = wgPageAuthors.toHex(wgPageAuthors.fnv(r[x]['*']));
				if(r[x].size == 'undefined') r[x].size = r[x]['*'].length;
				r[x].vec = wgPageAuthors.rvec(r[x]['*']);
				r[x]['*'] = null; // conserve space
				wgPageAuthors.revisions.push(r[x]);
			}
			if (data['query-continue']) {
				$('#mw-js-message.mw-js-message-page-authors #progress').each(function(i, el) { el.innerHTML = wgPageAuthors.revisions.length + ' revisions loaded&hellip;'; });
				wgPageAuthors.query.rvstartid = data['query-continue'].revisions.rvstartid;
				wgPageAuthors.getRevisions();
			}
			else {
				wgPageAuthors.setResult();
			}
		});
	},

	setResult: function () {
		// final handler after all ajax calls has completed
		$('#mw-js-message.mw-js-message-page-authors #progress').each(function(i, el) { el.innerHTML = 'All revisions loaded&hellip;'; });

		// variables	
		var digests = Object();
		var users = Object();
		var revids = Object();

		// short form to get the revisions
		var revs = wgPageAuthors.revisions;

		// get the individual revisions through a revid-key
		for (var i = 0; i < revs.length; i++) revids[revs[i].revid] = revs[i];

		// get the individual revisions through the fnv-digest
		$('#mw-js-message.mw-js-message-page-authors #progress').each(function(i, el) { el.innerHTML = 'Calculate FNV-digests&hellip;'; });
		var shunted = 0;
		for (var i = revs.length-1; 0<=i; i--) {
			if (digests[revs[i].fnv]) {
				revs[i].previousid = digests[revs[i].fnv].revid;
				shunted++;
			}
			digests[revs[i].fnv] = revs[i];
		}

		// cache the cosine measure and set previousid to reflect the span
		$('#mw-js-message.mw-js-message-page-authors #progress').each(function(i, el) { el.innerHTML = 'Calculate cosine measure&hellip;'; });
		var i = 0;
		while (i<revs.length) {
			var j = i;
			if (revs[i].previousid) {
				revs[i].cosine = wgPageAuthors.rcos(revs[i].vec, revids[revs[i].previousid].vec);
				var previousid = revs[i].previousid;
				while (revs[i].revid > previousid && i<revs.length) i++;
			}
			else if (revs[i].parentid) {
				revs[i].cosine = wgPageAuthors.rcos(revs[i].vec, revids[revs[i].parentid].vec);
				var parentid = revs[i].parentid;
				while (revs[i].revid > parentid && i<revs.length) i++;
			}
			else if (i+1<revs.length) {
				revs[i].cosine = wgPageAuthors.rcos(revs[i].vec, revs[i+1].vec);
				i++;
			}
			else if (i+1==revs.length) {
				revs[i].cosine = wgPageAuthors.rcos(revs[i].vec);
				revs[j].previousid = 0;
				break;                                                        
			}
			if (i != j) revs[j].previousid = revs[i].revid;
		}

		// adjust the cosine measure if lsh is to far off
		$('#mw-js-message.mw-js-message-page-authors #progress').each(function(i, el) { el.innerHTML = 'Adjust cosine measure&hellip;'; });
		var similar = 0;
		var revision = revs[0];
		while (revision) {
			var previous = revids[revision.previousid];
			if (previous /* && revision.size > 100 */ && 64<revision.cosine) {
				var p=previous;
				var lsh=2147483647;
				var keep = null;
				for (var j = 0; j < 16; j++) {
					if (p) {
						var tmp = wgPageAuthors.rcos(revision.vec, p.vec);
						if (lsh > tmp) {
							lsh = tmp;
							keep = p;
						}
						p = revids[p.previousid];
					}
					else {
						break;
					}
				}
				if (keep && keep != previous) {
					previous = keep;
					similar++;
				}
			}
			if (previous) {
				revision.previousid = previous.revid;
				revision.cosine = wgPageAuthors.rcos(revision.vec, previous.vec);
			}
			revision = previous;
		}

		// accumulate the cosine measure for each user
		$('#mw-js-message.mw-js-message-page-authors #progress').each(function(i, el) { el.innerHTML = 'Accumulate cosine measure&hellip;'; });
		var revision = revs[0];
		var num = 0;
		while (revision) {
			if (!users[revision.user]) users[revision.user] = { cosine: 0 };
			users[revision.user].cosine += revision.cosine;
			revision = revids[revision.previousid];
			num++;
		}
		// accumulate the total cosine measure
		var acc = { cosine: 0 };
		for (var x in users) acc.cosine += users[x].cosine;

		// sort out the main authors
		$('#mw-js-message.mw-js-message-page-authors #progress').each(function(i, el) { el.innerHTML = 'Sort out main authors&hellip;'; });
		var authors = Array();
		var pseudonyms = Array();
		var count = 0;
		for (var x in users) pseudonyms.push(x);
		for (var x in pseudonyms.sort(function(a, b){ return users[b].cosine - users[a].cosine })) {
			var name = pseudonyms[x];
			var cosine = Math.round(100*users[name].cosine/acc.cosine);
			if (count++ < 5 && 0 < cosine) authors.push(name);
			else if (5 <= cosine ) authors.push(name);
			else if (revs[revs.length-1].user == name) authors.push(name);
		}

		// print the cosine measure for each user
		$('#mw-js-message.mw-js-message-page-authors #progress').each(function(i, el) { el.innerHTML = 'Create presentation&hellip;'; });
		var str = '<ul style="display:none">';
		var part = 0;
		for (var x in authors) {
			var name = authors[x];
			part += users[name].cosine;
			str += '<li>';
			str += '<a href="' + wgArticlePath.replace('\$1', 'User:' + encodeURI(name)) + '">' + name + '</a>';
			str += ' (';
			str += '<a href="' + wgArticlePath.replace('\$1', 'User_talk:' + encodeURI(name)) + '">diskusjon</a>';
			str += ' | ';
			str += '<a href="' + wgArticlePath.replace('\$1', 'Special:Contributions/' + encodeURI(name)) + '">bidrag</a>';
			str += ') ';
			str += ' <em>';
			str += 'Har bearbeidet ' + Math.round(100*users[name].cosine/acc.cosine) + '%' + ' av artikkelen';
			if (revs[revs.length-1].user == name) str += ' og er artikkeloppretter';
			str += '.</em>';
			str += '</li>';
		}
		str += '</ul>';
		str += '<small><b>Forenkla berekning!</b> ';
		str += 'Bidrag frå andre forfattarar er ' + Math.round(100*(acc.cosine-part)/acc.cosine) + '%. ';
		str += 'Utelatte revisjonar og feiltolka bidrag kan gje avvik. ';
		str += 'Det er påvist ' + shunted + ' identiske revisjonar, og søkt ' + similar + ' gonger etter forutgåande liknande revisjonar på grunn av store endringar. Dette kan også gje forkasta mellomliggande revisjonar. ';
		str += 'Det er brukt ' + num + ' revisjonar av dei totalt ' + wgPageAuthors.revisions.length + ' som vart lasta, det vil seie at ' + (wgPageAuthors.revisions.length - num) + ' mellomliggande revisjonar vart forkasta i analysen.<br />';
		str += 'Se <a href="http://no.wikipedia.org/wiki/Hjelp:Forfattarar_av_sider">hjelpesida</a> for meir forklaring.';
		str += '</small>';
		wgPageAuthors.str = str;
		$('#mw-js-message.mw-js-message-page-authors #progress').remove();
		$('#mw-js-message.mw-js-message-page-authors h3').each(function(i, el) { removeSpinner( 'page-authors' ); });
		$('#mw-js-message.mw-js-message-page-authors').append(str);
		$('#mw-js-message.mw-js-message-page-authors ul').slideDown(1000);
		wgPageAuthors.inProgress = false;
	}
}

// the strange test (2147483647 <= -1 >>> 1) is to verify that we have at least 32 bit ints
// note that 32 bit ints are according to standard, while it can be longer but not shorter to respect the standard
if (0 <= wgNamespaceNumber && 0 == wgNamespaceNumber%2 && wgIsArticle && (2147483647 <= -1 >>> 1)) {
	try {
		if (typeof($j) == 'undefined') mw.loader.load("//bits.wikimedia.org/skins-1.5/common/jquery.min.js");
		addOnloadHook( function() {
			mw.util.addPortletLink(
				"p-cactions",
				'javascript:wgPageAuthors.onclick("page-authors")',
				"Forfattarar",
				"t-article-authors",
				"Identifiser forfattarar av sida",
				null,
				null
			);
		});
	}
	catch (e) { /* just go away */ }
}