User:Ricordisamoa/ACimport.js: Difference between revisions

From Wikidata
Jump to navigation Jump to search
Content deleted Content added
Ricordisamoa (talk | contribs)
important bugfix
Ricordisamoa (talk | contribs)
another patch
Line 138: Line 138:
var entity=data.entities[Object.keys(data.entities)[0]];
var entity=data.entities[Object.keys(data.entities)[0]];
if((!entity.claims||!entity.claims["p345"])&&lang==="en"){
if((!entity.claims||!entity.claims["p345"])&&lang==="en"){
if(content.match(/\{\{[Ii]MDb title(\}\}|\|)/g).length>1){
console.warn("More than 1 occurrence of {{IMDb title}} found on "+pageTitle+"@"+lang+".wiki");
return;
}
var wpVal=content.match(/\{\{[Ii]MDb title\|([Ii]d=)?([0-9]+)(\}\}|\|)/);
var wpVal=content.match(/\{\{[Ii]MDb title\|([Ii]d=)?([0-9]+)(\}\}|\|)/);
if(wpVal!=null) queue.push({itemId:Object.keys(data.entities)[0],propName:"imdb",propValue:wpVal[2],lang:lang});
if(wpVal!=null) queue.push({itemId:Object.keys(data.entities)[0],propName:"imdb",propValue:wpVal[2],lang:lang});

Revision as of 12:55, 7 April 2013

/* <nowiki>
 *
 * ACimport.js by [[User:Ricordisamoa]]
 *
 * automatically imports Authority Control data from de,en,it,ja.wiki to Wikidata
 *
 * ---> optimized to run on a bot <---
 * currently serving [[User:SamoaBot]]
 *
*/
(function(){
	var queue=[];
	var properties={
		VIAF:214,
		GND:227,
		LCCN:244,
		BNF:268,
		SUDOC:269,
		NDL:349,
		"imdb_id":345,
		imdb:345
	};
	var references={//list of Wikipedia item IDs
		de:48183,
		en:328,
		fr:8447,
		it:11920,
		ja:177837,
		nl:10000,
		ru:206855,
		es:8449
	};
	var formatIMDb=function(IMDb){
		return "tt"+IMDb.replace(/^tt/,"");
	};
	var formatLCCN=function(LCCN){
		LCCN=LCCN.replace(new RegExp("\\/","g"),"");
		var numbers=LCCN.match(/[0-9]+/)[0].length;
		if(numbers<8) LCCN=LCCN.replace(/([0-9]{2})/,"$1"+Array(9-numbers).join("0"));
		return LCCN;
	};
	var setProp=function(itemId,propName,propValue,lang){
		if(Object.keys(properties).indexOf(propName)===-1) return;
		if(propName==="imdb"&&/^[0-9]+$/.test(propValue)===false) return;
		$.post(
			mw.util.wikiScript("api"),
			{
				format:"json",
				action:"wbcreateclaim",
				entity:itemId,
				snaktype:"value",
				value:"\""+(propName==="LCCN"?formatLCCN(propValue):(propName.indexOf("imdb")!=-1?formatIMDb(propValue):propValue))+"\"",
				summary:"importing "+propName+" from "+lang+".wiki",
				property:"p"+properties[propName],
				bot:true,
				assert:"bot",
				token:mw.user.tokens.get("editToken")
			}
		)
		.done(function(data){
			if(data.error&&data.error.info) console.warn("Error: "+data.error.info);
			else{
				var propId=data.claim.id;
				var snaks={
					"p143":[{
						snaktype:"value",
						property:"p143",
						datavalue:{
							type:"wikibase-entityid",
							value:{
								"entity-type":"item",
								"numeric-id":references[lang]
							}
						}
					}]
				};
				$.post(
					mw.util.wikiScript("api"),
					{
						format:"json",
						action:"wbsetreference",
						entity:itemId,
						statement:propId,
						snaks:JSON.stringify(snaks),
						summary:"importing reference from "+lang+".wiki",
						bot:true,
						assert:"bot",
						token:mw.user.tokens.get("editToken")
					}
				)
				.done(function(data){
					if(data.error&&data.error.info) console.warn("Error: "+data.error.info);
					else console.log(itemId+"  |  "+propName+"  |  "+propValue);
				})
				.fail(function(){
					console.warn("Error");
				});
			}
		})
		.fail(function(){
			console.warn("Error");
		});
	};
	var startTask=function(){
		console.log(queue.length+" item"+(queue.length>1?"s":"")+" to be processed.");
		var ii=0;
		var interval=setInterval(function(){
			if(ii<queue.length){
				setProp(queue[ii].itemId,queue[ii].propName,queue[ii].propValue,queue[ii].lang);
				ii+=1;
			}
			else{
				console.log("  --->  Task completed.");
				return;
			}
		},10000);
	};
	var doPage=function(lang,pageTitle,content,start){
		$.getJSON(
			mw.util.wikiScript("api"),
			{
				action:"wbgetentities",
				sites:lang+"wiki",
				titles:pageTitle,
				format:"json",
				props:"claims"
			},
			function(data){
				console.groupCollapsed(pageTitle);
				console.log(content);
				console.groupEnd();
				var wdProperties={};
				if(typeof Object.keys(data.entities)=="undefined"||Object.keys(data.entities).length!=1) return;
				if(Object.keys(data.entities)[0]==-1){
					console.warn("Entity not found: "+pageTitle+"@"+lang+".wiki");
					return;
				}
				var entity=data.entities[Object.keys(data.entities)[0]];
				if((!entity.claims||!entity.claims["p345"])&&lang==="en"){
					if(content.match(/\{\{[Ii]MDb title(\}\}|\|)/g).length>1){
						console.warn("More than 1 occurrence of {{IMDb title}} found on "+pageTitle+"@"+lang+".wiki");
						return;
					}
					var wpVal=content.match(/\{\{[Ii]MDb title\|([Ii]d=)?([0-9]+)(\}\}|\|)/);
					if(wpVal!=null) queue.push({itemId:Object.keys(data.entities)[0],propName:"imdb",propValue:wpVal[2],lang:lang});
				}
				else if(entity.claims){
					$.each(properties,function(key,property){
						if(entity.claims["p"+property]){
							var wdVal=entity.claims["p"+property][0].mainsnak.datavalue.value;
							console.log("wd   "+key+": "+wdVal);
							wdProperties[key]=wdVal;
						}
					});
				}
				if(lang!="en") $.each(properties,function(key,property){
					var wpVal=content.match(new RegExp("\\| *"+key+" *= *([0-9a-zA-Z\\/\\-]+)[^0-9]"));
					if(wpVal!=null){
						wpVal=wpVal[1];
						console.log("wp   "+key+": "+wpVal);
						if(Object.keys(wdProperties).indexOf(key)!=-1){
							console.log(" -->   "+key+" already present on Wikidata");
							if(wdProperties[key]!=wpVal){
								if(key==="LCCN"&&formatLCCN(wdProperties[key])===formatLCCN(wpVal)) return;
								if(key==="imdb"&&formatIMDb(wdProperties[key])===formatIMDb(wpVal)) return;
								else console.warn("Conflict for "+key+"!");
							}
						}
						else{
							queue.push({itemId:Object.keys(data.entities)[0],propName:key,propValue:wpVal,lang:lang});
							console.log(" -->   "+key+" not present: added to queue");
						}
					}
				});
				if(start){
					console.log(queue);
					startTask();
				}
			}
		);
	};
	window.ACimport={};
	window.ACimport.fromPage=function(lang,pageTitle){
		$.get(
			"//"+lang+".wikipedia.org/w/api.php",
			{
				action:"query",
				format:"json",
				titles:pageTitle,
				prop:"revisions",
				rvprop:"content"
			},
			function(data){
				doPage(lang,pageTitle,data.query.pages[Object.keys(data.query.pages)[0]].revisions[0]["*"],true);
			},
			"jsonp"
		);
	};
	window.ACimport.startTask=function(lang,taskLength,eicontinue){
		var temp=$.jStorage.get("ACimport-eicontinue");
		if(typeof eicontinue=="undefined"&&temp!=null) eicontinue=temp;
		var templates={
			de:"Normdaten",
			en:"Infobox film",//IMDb title","Authority control"
			fr:"Autorité",
			it:"Controllo di autorità",
			ja:"Normdaten",
			nl:"Infobox film",
			ru:"Фильм",
			es:"Ficha de película"
		};
		$.get(
			"//"+lang+".wikipedia.org/w/api.php",
			$.extend(
				{
					action:"query",
					format:"json",
					generator:"embeddedin",
					geinamespace:0,
					geilimit:taskLength,
					geititle:"Template:"+templates[lang],
					geifilterredir:"nonredirects",
					prop:"revisions",
					rvprop:"content"
				},
				typeof eicontinue!="undefined"?{geicontinue:"10|"+templates[lang]+"|"+eicontinue}:{}
			),
			function(data){
				$.jStorage.set("ACimport-eicontinue",data["query-continue"].embeddedin.geicontinue.split("|")[2]);
				console.log("Continue-key: "+data["query-continue"].embeddedin.geicontinue);
				var pages=$.grep(
					$.map(data.query.pages,function(e){
						return e;
					}),
					function(e){
						return typeof e.revisions!="undefined";
					}
				);
				console.log(pages.length);
				$.each(pages,function(index,page){
					doPage(lang,page.title,page.revisions[0]["*"],index===pages.length-1);
				});
			},
			"jsonp"
		);
	};
})();