ਮੀਡੀਆਵਿਕੀ:Gadget-Fill Index.js

ਵਿਕੀਸਰੋਤ ਤੋਂ

Note: After publishing, you may have to bypass your browser's cache to see the changes.

  • Firefox / Safari: Hold Shift while clicking Reload, or press either Ctrl-F5 or Ctrl-R (⌘-R on a Mac)
  • Google Chrome: Press Ctrl-Shift-R (⌘-Shift-R on a Mac)
  • Internet Explorer / Edge: Hold Ctrl while clicking Refresh, or press Ctrl-F5
  • Opera: Press Ctrl-F5.
/*
 * Author: w:fr:Phe
 *
 * Import the contents of the "Book" template from Commons into the Index
 * page fields at Wikisource
 *
 * Modified: 2020-11-10:    More robust template handling to deal with Faebot
 *                          uploads (Inductiveload)
 *           2020-11-27:    Some simple heuristics to improve IA metadata
 *           2021-04-03:    Supports authors set with {{creator|wikidata=Qxxxx}}
 */

/* eslint-disable camelcase, one-var, vars-on-top */

( function ( mw, $, Promise ) {
	'use strict';

	// var FillIndex = {
	// };

	function parse_template( text, template ) {
		// find the start of the the template in the wikitext
		var re = new RegExp( '{{ *' + template + '[ \n]*\\|', 'i' ),
			index = text.search( re );

		// The template is not
		if ( index < 0 ) {
			return [ null, null ];
		}

		var tokens = [],

			old_index = index,

			token_list = [
				[ '{{', '}}' ],
				[ '[[', ']]' ]
			// ["[", "]"],
			],

			param_name = '',
			param_content = '',
			found_equals = false,
			pos_param_idx = 0,

			params = {};

		while ( index < text.length ) {

			var handled_token = false;

			for ( var i = 0; i < token_list.length; i += 1 ) {
				var cand_token = text.slice( index, index + token_list[ i ][ 0 ].length );
				if ( cand_token === token_list[ i ][ 0 ] ) {
					tokens.push( cand_token );
					index += cand_token.length;

					if ( cand_token !== '{{' || Object.keys( params ).length > 0 ) {
						param_content += cand_token;
					}
					handled_token = true;
					break;
				} else if ( cand_token === token_list[ i ][ 1 ] &&
						tokens.slice( -1 )[ 0 ] === token_list[ i ][ 0 ] ) {
					tokens.pop();
					index += cand_token.length;
					param_content += cand_token;
					handled_token = true;
					break;
				}
			}

			if ( tokens.length === 0 ) {
				// end of template
				break;
			} else {

				if ( text[ index ] === '|' && tokens.length === 1 ) {
					param_name = '';
					param_content = '';
					found_equals = false;
				} else if ( tokens.length === 1 &&
						( text[ index + 1 ] === '|' || text.slice( index, index + 2 ) === '}}' ) ) {
					// end of a template parameter, save it
					param_name = param_name.trim();
					param_content = param_content.trim();

					if ( param_name.length === 0 ) {
						// positional parameter (pos=0 is the template name)
						params[ pos_param_idx ] = param_content;
						pos_param_idx += 1;
					} else {
						param_name = param_name[ 0 ].toUpperCase() + param_name.slice( 1 );
						params[ param_name ] = param_content;
					}
				} else if ( text[ index + 1 ] === '=' && !found_equals ) {
					found_equals = true;
					param_name = param_content;
					param_content = '';
					index += 1; // skip =
				} else if ( !handled_token ) {
					param_content += text[ index ];
				}

				if ( !handled_token ) {
					// tokens do their own lengths
					index += 1;
				}
			}
		}

		if ( tokens.length === 0 ) {
			// got to end of template
			return [ params, text.slice( old_index, index ) ];
		}
		return [ null, text.slice( old_index, index ) ];
	}

	/*
	 * Converts text to title case.
	 *
	 * BOOK IV. THE INSTRUCTIONS OF I -> Book IV. The Instructions of I.
	 *
	 * Takes care of:
	 *   - all-caps roman numerals
	 *   - always title-cases the first words after .
	 *   - otherwise title-cases words except a list of exceptions like 'a', 'of'
	 */
	var toTitleCase = function ( str ) {

		var titler = function ( word ) {
			if ( word.length === 0 ) {
				return word;
			}

			return word.replace( word[ 0 ], word[ 0 ].toUpperCase() );
		};

		var all_capped = function ( word ) {
			// check for roman numerals (and "I"), maybe followed by punct
			return ( word.search( /^[ivxlcdm]+\b.$/ ) > -1 );
		};

		// if bookish title case, not all words are capped
		var no_cap_words = [ 'a', 'an', 'be', 'the', 'of', 'on', 'to', 'at', 'this', 'than',
			'then', 'by', 'and', 'for', 'with', 'in'
		];

		var words = str.toLowerCase().split( ' ' );

		var titled = [];

		var new_sentence = true;

		for ( var i = 0; i < words.length; i++ ) {

			if ( all_capped( words[ i ] ) ) {
				// some words are all caps always
				titled.push( words[ i ].toUpperCase() );

			} else if ( new_sentence || no_cap_words.indexOf( words[ i ] ) === -1 ) {
				// new sentences and most words get title casing
				titled.push( titler( words[ i ] ) );
			} else {
				// lower
				titled.push( words[ i ] );
			}

			new_sentence = words[ i ].search( /\.$/ ) !== -1;
		}

		return titled.join( ' ' );
	};

	var extract_dict = {},
		field_names = {};

	function setup_extract_dict() {
		extract_dict = self.fill_index_data.extract_dict;
		field_names = self.fill_index_data.field_names;
	}

	/**
	 * Set the appropriate input field
	 *
	 * @param {string} idx     the field index
	 * @param {string|Promise} content the new content, or a Promise that resolves it
	 */
	function set_field( idx, content ) {

		// this resolves with either the raw value, or the resolution of the Promise
		// eslint-disable-next-line compat/compat
		Promise.resolve( content ).then( function ( content_value ) {
			content_value = content_value.replace( / ([;:,]) ?/, '$1 ' );

			// fix any sneaky double spaces
			content_value = content_value.replace( / +/g, ' ' );

			var field_name = field_names[ idx ],
				f = document.getElementsByName( 'wpprpindex-' + field_name )[ 0 ];

			if ( f ) {
				f.value = content_value;
			}
		} );
	}

	function get_wd_author( qid ) {

		// eslint-disable-next-line compat/compat
		return new Promise( function ( resolve, reject ) {
			$.ajax( {
				url: '//wikidata.org/w/api.php',
				data: {
					format: 'json',
					action: 'wbgetentities',
					ids: qid,
					props: 'sitelinks'
				},
				dataType: 'jsonp',
				cache: true,
				success: function ( data ) {
					var author = data.entities[ qid ].sitelinks[ mw.config.get( 'wgWikiID' ) ].title;
					console.log( author );
					resolve( author );
				},
				error: function ( error ) {
					reject( error );
				}
			} );
		} );
	}

	// returns a promise that resolves the author
	function process_author( str ) {
		str = str.replace( /^[*:][ ]*/, '' );
		str = str.trim();

		var author_promise;

		if ( str.match( /Q[0-9]+/ ) ) {

			author_promise = get_wd_author( str );
		} else {

			// strip dates - these are nearly always not needed
			str = str.replace( /(?:, )?(?:(?:ca\.|fl\.) )?(\(?\d+-\d+\)?).?$/, '' );

			// strip birth date
			str = str.replace( /(?:, )(?:b\.|d\.) +\d{3,4}$/, '' );

			// strip initial expansions
			str = str.replace( /(?:[A-Z]. ?)+ \((.*)\)/, '$1' );

			str = str.replace( /, (Sir|Lord)$/, '' );

			// Last, First -> First Last
			str = str.replace( /^([^,]+), ([^,]+)$/, '$2 $1' );

			// Fix initials without dots
			str = str.replace( / ([A-Z]) /g, ' $1. ' );

			// Fix bogus fullstops
			// str = str.replace(/(?<!Jr|Sr)\.$/, "");

			// just resolve right now
			author_promise = Promise.resolve( str );
		}

		return author_promise.then( function ( author ) {
			// prevent the pipe trick triggering on the JS
			// eslint-disable-next-line no-useless-concat
			return '[' + '[' + self.fill_index_data.ns_author_name + ':' + author + '|]]';
		} );
	}

	// returns a promise that resolves with the processed author list
	function process_authors( str ) {

		// strip creator templates:
		str = str.replace( /{{[ ]*[Cc]reator[ ]*:[ ]*(.*?)[ ]*}}/g, '$1\n' );

		// TODO: fix wikidata here
		str = str.replace( /{{[ ]*[Cc]reator[ ]*\|[ ]*[Ww]ikidata[ ]*=[ ]*(Q[0-9]*)}}/g, '$1\n' );

		var as = str.split( '\n' );

		as = as.filter( function ( s ) {
			return !!s.trim();
		} );

		// map array to promises
		var promises = as.map( function ( author ) {
			return process_author( author );
		} );

		// eslint-disable-next-line compat/compat
		return Promise.all( promises ).then( function ( results ) {
			var list = results.join( ', ' );
			// console.log( list );
			return list;
		} );
	}

	function split_city_publisher( str ) {

		// most books are published in a few cities
		var cities = [ /London/, /Edinburgh/, /Oxford/, /Cambridge/,
				/New York/, /Boston/, /Philadelphia/, /Washington D. ?C./,
				/Paris/,
				/Berlin/, /Stuttgart/, /Jena/,
				/Hong Kong/, /Shanghai/,
				/Calcutta/, /Bombay/, /Delhi/ ],

			city = '',
			publisher = str,
			parts;

		if ( str.indexOf( ':' ) > -1 ) {
			// a colon: assume this is a city: publisher
			parts = str.split( ':' );
			city = parts[ 0 ];
			publisher = parts.slice( 1 ).join( ':' );
		} else {

			parts = str.split( /[,;:] / );

			if ( parts.length > 1 ) {
				for ( var i = 0; i < cities.length; i++ ) {
					if ( parts[ 0 ].match( cities[ i ] ) ) {
						city = parts[ 0 ];
						publisher = parts.slice( 1 ).join( ', ' );
						break;
					}
				}
			}
		}

		return [ publisher.trim(), city.trim() ];
	}

	function processVolume( v ) {

		// first, strip off either : Foo or (Foo):
		var match = v.match( /(.*?) *(?:\((.*)\)|: *(.*))?$/ );

		var vol = v;
		var v_desc = '';
		if ( match ) {
			vol = match[ 1 ];
			v_desc = match[ 2 ];
		}

		// Add "Volume " if it looks like we need it
		vol = vol.replace( /^(?:(?:vol|v)\. ?)?([-0-9]+)$/i, 'Volume $1' );

		return [ vol, v_desc ];
	}

	function processCity( c ) {
		c = c.replace( /\{\{ *City *\| *(.*?) *\}\}/i, '$1' );
		return c;
	}

	function extract_content( data ) {
		var importationDone = false;
		// until Object entries is allowed
		// eslint-disable-next-line no-jquery/no-each-util
		$.each( data.query.pages, function ( ids, page ) {
			if ( ids < 0 ) {
				return;
			}

			var content = page.revisions[ 0 ][ '*' ],
				temp_parsed = parse_template( content, 'Book' );

			if ( temp_parsed[ 0 ] === null ) {
				console.error( 'Failed to parse Book template' );
			} else {

				var title = temp_parsed[ 0 ][ extract_dict.Title ];

				if ( title ) {
					title = toTitleCase( title );
					set_field( 'Title', "''[[" + title + "]]''" );
				}

				for ( var idx in extract_dict ) {

					var template_content = '';

					if ( typeof extract_dict[ idx ] === 'string' ) {
						template_content = temp_parsed[ 0 ][ extract_dict[ idx ] ];
					} else {
						// find the first matching parameter
						for ( var i = 0; i < extract_dict[ idx ].length; i++ ) {

							template_content = temp_parsed[ 0 ][ extract_dict[ idx ][ i ] ];

							if ( template_content !== undefined && template_content.length > 0 ) {
								break;
							}
						}
					}

					if ( template_content !== undefined && template_content.length > 0 ) {
						switch ( idx ) {
							case 'Editor':
							case 'Author':
							case 'Translator':
							case 'Illustrator':
								set_field( idx, process_authors( template_content ) );
								break;
							case 'Publisher':
							// it is very common for the Commons publisher field
							// to contain the location
								var pub_city = split_city_publisher( template_content );

								if ( pub_city[ 1 ].length > 0 ) {
									set_field( 'Publisher', pub_city[ 0 ] );
									set_field( 'City', pub_city[ 1 ] );
								} else {
									set_field( 'Publisher', pub_city[ 0 ] );
								}
								break;
							case 'Volume':

								var v = processVolume( template_content );

								var v_field;
								if ( title !== undefined ) {
									v_field = '[[' + title +
									'/' + v[ 0 ] + '|' + v[ 0 ] + ']]';
								} else {
									// fallback
									v_field = v[ 0 ];
								}

								if ( v[ 1 ] ) {
									v_field += ' (' + v[ 1 ] + ')';
								}
								set_field( idx, v_field );
								break;
							case 'Title':
								break;
							case 'City':
								set_field( idx, processCity( template_content ) );
								break;
							default:
								set_field( idx, template_content );
						}
					}
				}
			}

			// set the file type selector
			set_field( 'Source', mw.config.get( 'wgTitle' ).split( '.' ).slice( -1 )[ 0 ] );

			// set the sort key
			{
				var skTitle = temp_parsed[ 0 ][ extract_dict.Title ];
				if ( skTitle !== undefined ) {
					var titlewords = skTitle.split( ' ' );
					if ( [ 'The', 'A', 'An', 'Of' ].indexOf( titlewords[ 0 ] ) >= 0 ) {
						skTitle = titlewords.slice( 1 ).join( ' ' ) + ', ' + titlewords[ 0 ];
						skTitle = skTitle[ 0 ].toUpperCase() + skTitle.slice( 1 );
						set_field( 'Key', skTitle );
					}
				}
			}
			importationDone = true;
		} );

		return importationDone;
	}

	function common_content( data ) {
		if ( !extract_content( data ) ) {
			$.ajax( {
				url: mw.util.wikiScript( 'api' ),
				data: {
					format: 'json',
					action: 'query',
					prop: 'revisions',
					rvprop: 'content',
					titles: 'File:' + mw.config.get( 'wgTitle' )
				}
			} )
				.done( extract_content );
		}
	}

	function do_extraction() {
		$.ajax( {
			url: '//commons.wikimedia.org/w/api.php',
			data: {
				format: 'json',
				action: 'query',
				prop: 'revisions',
				rvprop: 'content',
				titles: 'File:' + mw.config.get( 'wgTitle' )
			},
			dataType: 'jsonp'
		} )
			.done( common_content );
	}

	function setup() {
		setup_extract_dict();
		// eslint-disable-next-line no-jquery/no-global-selector
		if ( $( '.mw-newarticletext' ).length === 0 ) {

			// Portlet link to re-extract
			var portlet = mw.util.addPortletLink(
				'p-tb',
				'#',
				'Re-fill index',
				't-refill-index',
				'Re-import this index page\'s data from the Commons file'
			);

			$( portlet ).on( 'click', function ( e ) {
				e.preventDefault();
				do_extraction();
			} );
		} else {
			do_extraction();
		}
	}

	/* Localisation section, you can provide your own data before loading this script to
 * change the script behavior
 */
	if ( !self.fill_index_data ) {
		self.fill_index_data = {};
	}

	if ( !self.fill_index_data.ns_author_name ) {
		self.fill_index_data.ns_author_name = 'Author';
	}

	if ( !self.fill_index_data.extract_dict ) {
	// Commons Book template field names
	// Should not need to be internationalised
		self.fill_index_data.extract_dict = {
			Editor: 'ਸੰਪਾਦਕ',
			Publisher: 'ਪ੍ਰਕਾਸ਼ਕ',
			Author: 'ਲੇਖਕ',
			Translator: 'ਅਨੁਵਾਦਕ',
			Volume: 'ਭਾਗ',
			Illustrator: 'ਚਿੱਤਰਕਾਰ',
			'Image page': 'ਤਸਵੀਰ ਸਫ਼ਾ',
			Title: 'ਸਿਰਲੇਖ',
			Date: [ 'ਪ੍ਰਕਾਸ਼ਨ ਮਿਤੀ', 'ਸਾਲ' ],
			City: 'ਸ਼ਹਿਰ',
			Source: 'ਸਰੋਤ',
			LCCN: 'LCCN',
			OCLC: 'OCLC'
		};
	}

	if ( !self.fill_index_data.field_names ) {
	// Proofread page field names
		self.fill_index_data.field_names = {
			Editor: 'ਸੰਪਾਦਕ',
			Publisher: 'ਪ੍ਰਕਾਸ਼ਕ',
			Author: 'ਲੇਖਕ',
			Translator: 'ਅਨੁਵਾਦਕ',
			Volume: 'ਭਾਗ',
			Illustrator: 'ਚਿੱਤਰਕਾਰ',
			'Image page': 'ਤਸਵੀਰ',
			Title: 'ਸਿਰਲੇਖ',
			Date: 'ਸਾਲ',
			City: 'ਪਤਾ',
			Source: 'ਸਰੋਤ',
			LCCN: 'LCCN',
			OCLC: 'OCLC',
			Key: 'Key'
		};
	}
	/* end of localisation section */

	$( function () {
		if ( mw.config.get( 'wgCanonicalNamespace' ) === 'Index' && mw.config.get( 'wgAction' ) === 'edit' ) {
			setup();
		}
	} );
// eslint-disable-next-line no-undef
}( mediaWiki, jQuery, Promise ) );