User:Neoconned/SourceWatchRefConverter
Jump to navigation
Jump to search
This has now been mothballed in favour of the SourceWatch:RefConverterBot project.
- This is a temporary home for the code. When it's a bit more finished, it will be integrated into MediaWiki:Monobook.js.
- This code is adapted and developed from Cyde Weys's ref converter: http://en.wikipedia.org/wiki/User:Cyde/Ref_converter. There are two purposes to the rewrite:
- Handle "traditional" SourceWatch style referencing. This consists of a plain numbered link in the body of the article, e.g. [1] and a corresponding citation in the External Links section. The vast majority of SW articles still use this referencing style. Very few SW articles use the note/ref templates (which are what Cyde Weys's converter will convert).
- Run in JavaScript rather than Perl. You'll therefore be able to run the converter from the Edit page when you edit an article.
- The rewrite will take a while. Don't expect results soon.
The code
// This program converts (on MediaWiki wikis): // *{{note}} and {{ref}} to <references/> style. // *Traditional SourceWatch style references to <references/> style. // // Copyright (C) 2006 Ben "Cyde Weys" McIlwain // Copyright (C) 2007 Neoconned (http://www.sourcewatch.org/index.php?title=User:Neoconned) // // This program is free software; you can redistribute it and/or // modify it under the terms of the GNU General Public License // as published by the Free Software Foundation; either version 2 // of the License, or (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. // // // function swrcMain(fullText) { //This accumulates the number of possible things that were incorrect with {{ref}}/{{note}}. var numErrors = 0; // This keeps track of the initial length of the article // before we make any changes to it. var preLength = fullText.length; //These two variables accumulate lines of text and are //output at the end. var warnings = new String(); var verbosage = new String(); // Neoconned: Not really sure what this is about... //Get rid of the "How to add a footnote" comment that this script makes superfluous. //if ($fullText =~ m/\<\!\-\-[^\n]*add[^\n]*footnote.*?\-\-\>/s) { // $fullText =~ s/\<\!\-\-[^\n]*add[^\n]*footnote.*?\-\-\>//s; // $warnings .= "Deleting comment on how to add old footnotes, make sure this was done correctly.\n"; //} //{{mnb2}} is incredibly broken if (fullText.indexOf('mnb2')!=-1) { alert("Panic, detecting {{mnb2}}, this article is most likely broken and will need manual repair."); return; } //This goes through the article source looking for citation //templates that are over one line. This is //necessary because the citation templates must be inserted into //the article text inline or things will break. //This has the side-effect of changing citation templates that //aren't part of notes. Oh well. //Then we need to detect if any changes have been made, and if //they have, print a warning message to that effect. my $tempText = $fullText; $fullText =~ s/(\{\{cite [^\{\}]*?\}\})/my$x=$1;$x=~s{\n}{}g; $x/egs; if ($tempText ne $fullText) { $warnings .= "Detecting multiple line cite, trying to fix, make sure I don't make any mistakes.\n"; } //Get a list of all matches of {{ref|...}} and {{ref label|...}} //and {{ref harv|...}} and {{ref harvard|...}} my @matches = ($fullText =~ m/\{\{(?:mn|ref(?:[_ ]label|[_ ]harv|[_ ]harvard|[_ ]num)?)\s*\|\s*([^\|]*?)\s*(?:\|\s*[^\|\}]*?\s*)*?\}\}/gi); //push @matches, ($fullText =~ m/\{\{mn\s*\|\s*([^\|]*?)\s*\|\s*[^\|\}]*?\s*\}\}/gi); // If there are no {{ref}}s in the article then there's // no point in continuing. if ($//matches > -1) { swrcArticleContainsSomeRefs(); } } function swrcArticleContainsSomeRefs() { ////// This next little section creates @matchesSingle, which // consists of @matches minus // any duplicate entries, and @matchesMult, which // consists of a list of single entries // of things that did have duplicate entries. It also // removes duplicate entries from @matches. // In other words, if @matches was [a,a,b,c,d,d,e], then: // @matches = [a,b,c,d,e] // @matchesSingle = [b,c,e] // @matchesMult = [a,d] my %tempHash; my %multHash; foreach (@matches) { //Note: lc turns all the characters of a string into //their lowercase counterparts._ if (exists $tempHash{lc($_)}) { $multHash{lc($_)} = lc($_); } else { $tempHash{lc($_)} = lc($_); } } @matches = sort values %tempHash; my @matchesMult = sort values %multHash; //Subtract set @matchesMult from set @matchesSingle foreach (@matchesMult) { delete $tempHash{$_}; } my @matchesSingle = sort values %tempHash; // // End complicated section. ////// if ($//matchesMult >= 0) { $warnings .= "Detecting multiple refs with the same name, make sure I handle this correctly.\n"; } //refCoors is the hash between ref name and note text. my %refCorrs = (); my $finalText = ""; my $firstMatch = 1; my $matched = 0; swrcFindTheNotes(fullText); my $currMatch = ""; //Go through and replace references that were only referenced once with a simple <ref>. foreach $currMatch (@matchesSingle) { if (exists $refCorrs{$currMatch} && $refCorrs{$currMatch} !~ m/^\s*$/) { if ($forceNames eq 'on') { my $refName = $currMatch; if ($refName =~ m/^\d+$/) { $refName = 'ref' . $refName; } $finalText =~ s/\{\{(?:mn|ref(?:[_ ]label|[_ ]harv|[_ ]harvard|[_ ]num)?)\s*\|\s*\Q$currMatch\E\s*(?:\|[^\|\}]*?\s*)*?\}\}/\<ref name=\"$refName\"\>$refCorrs{$currMatch}\<\/ref\>/gi; } else { $finalText =~ s/\{\{(?:mn|ref(?:[_ ]label|[_ ]harv|[_ ]harvard|[_ ]num)?)\s*\|\s*\Q$currMatch\E\s*(?:\|[^\|\}]*?\s*)*?\}\}/\<ref\>$refCorrs{$currMatch}\<\/ref\>/gi; } $verbosage .= "Replacing ref \"$currMatch\" with full note: \<ref\>$refCorrs{$currMatch}\<\/ref\>\n"; } elsif (exists $refCorrs{$currMatch} && $currMatch =~ m/^\s*$/) { //Deal with blank notes. We don't want to be inserting <ref></ref> into the article. $numErrors++; $warnings .= "Found a blank note, ref is \"$currMatch\"\n"; } else { $numErrors++; $warnings .= "Ref \"$currMatch\" doesn\'t exist in notes. Turning into \{\{citation needed\}\}\n"; } } //Now we need to go through and replace references that were referenced multiple times. //We need to name our references now. foreach $currMatch (@matchesMult) { if (exists $refCorrs{$currMatch} && $refCorrs{$currMatch} !~ m/^\s*$/) { //Cite.php returns an error if the refName is an integer value, so we'll pad it out with a character. my $refName = $currMatch; if ($refName =~ m/^\d+$/) { $refName = 'ref' . $refName; } $finalText =~ s/\{\{(?:mn|ref(?:[_ ]label|[_ ]harv|[_ ]harvard|[_ ]num)?)\s*\|\s*\Q$currMatch\E\s*(?:\|[^\|\}]*?\s*)*?\}\}/\<ref name=\"$refName\"\>$refCorrs{$currMatch}\<\/ref\>/i; $finalText =~ s/\{\{(?:mn|ref(?:[_ ]label|[_ ]harv|[_ ]harvard|[_ ]num)?)\s*\|\s*\Q$currMatch\E\s*(?:\|[^\|\}]*?\s*)*?\}\}/\<ref name=\"$refName\" \/\>/gi; $verbosage .= "Replacing multiply referenced \"$refName\" with full notes: \<ref\>$refCorrs{$currMatch}\<\/ref\>\n"; } elsif (exists $refCorrs{$currMatch} && $currMatch =~ m/^\s*$/) { //Deal with blank notes. We don't want to be inserting <ref></ref> into the article. $numErrors++; $warnings .= "Found a blank multiply referenced note, ref is \"$currMatch\"\n"; } else { $numErrors++; $warnings .= "Multiple reference \"$currMatch\" doesn\'t exist in notes. Turning into \{\{citation needed\}\}\n"; } } //One more loop through any remaining {{ref}} tags to turn them into {{citation needed}}. $finalText =~ s/\{\{(?:mn|ref(?:[_ ]label|[_ ]harv|[_ ]harvard|[_ ]num)?)\s*\|\s*[^\|]*?\s*(?:\|[^\|\}]*?\s*)*?\}\}/\{\{citation needed\}\}/gi; //Remove excess spaces that we may have just made by deleting the content inbetween. if ($finalText =~ m/\n{4,}/gs) { $warnings .= "I think I have found too many consecutive newlines, I am going to remove them, make sure I did this right.\n"; $finalText =~ s/\n{4,}/\n\n/gs; } //Final sanity checks if ($finalText =~ m/\{\{ref/gi) { $warnings .= "Failing sanity check, there may still be some {{ref}}s left.\n"; } if ($finalText =~ m/\{\{note/gi) { $warnings .= "Failing sanity check, there may still be some {{note}}s left.\n"; } if ($finalText =~ m/\{\{mn/gi) { $warnings .= "Failing sanity check, there may still be some Footnote4 stuff left ({{mn}} or {{mnb}}).\n"; } print '<b>Finished</b>.<br>' . "\n"; } else { } } /*
swrcFindTheNotes function
*/ //Split the full Wiki source into discrete lines and //process them sequentially to see if //each line contains a {{note}} or a {{note label}}. //If the line does contain a {{note}}, //match it up in the hash with its appropriate //ref. If it doesn't match, throw a warning //and comment it out. If it did match, remove it, and //replace all removed {{note}}s with a single <references/> function swrcFindTheNotes(fullText) { var fullTextLines=fullText.split("\n"); for (i=0; i<fullTextLines.length; i++) { var thisLine = fullTextLines[i]; var matched = 0; //Loop through each of the ref names to see if it matches //with any notes on this line. This has O(n*m) efficiency. foreach (@matches) { if ($thisLine =~ m/\{\{(?:mnb2?|note(?:[_ ]label)?)\s*\|\s*\Q$_\E\s*(?:\|\s*[^\{\}]*?\s*)*\}\}\s*(.*)$/i) { my $thisMatch = $1; if ($thisMatch =~ m/(\{\{note[_ ]label[^\}\{]*?\}\})/i) { $thisMatch =~ s/\{\{note[_ ]label\s*[^\}\{]*?\}\}//gi; } //Chop off leading and trailing spaces. $thisMatch =~ s/^\s+//; $thisMatch =~ s/\s+$//; $verbosage .= "Matching up ref \"$_\", removing from list, note is: $thisMatch\n"; $refCorrs{$_} = $thisMatch; $matched = 1; //firstMatch is used to keep track of the first note //that has been replaced. The first note is replaced //with <references /> and the rest are just deleted. if ($firstMatch == 1) { if ($fullText !~ m/\<references(\s*\/)?\>/g) { if ($smallFont eq "on") { $finalText .= '<div class="references-small"><references /></div>' . "\n"; } else { $finalText .= "<references />\n"; } } $firstMatch = 0; } } } //If this line had a note with no corresponding ref, comment //it out and print a warning message. if ($matched == 0) { if ($thisLine =~ m/\{\{(?:mnb2?|note)\s*\|\s*([^\|]*?)\s*\|?\s*\}\}\s*(.*)$/i) { $warnings .= "Note \"$1\" isn\'t referenced, commenting out, link was: $2\n"; $numErrors++; $finalText .= "<!-- Dead note \"$1\": $2 -->\n"; } else { $finalText = $finalText . $thisLine . "\n"; } } } }