Utente:IagaBot/user-fixes.py
Da Wikipedia, l'enciclopedia libera.
Questa pagina è solo una copia di QUESTA che tengo qui per comodità :D
# -*- coding: utf-8 -*- # # Il codice può essere inserito in user-fixes.py (createlo se non esiste). # # Comando di esempio: # # replace.py -namespace:0 -xml:itwiki-20080418-pages-meta-current.xml -fix:errori_comuni fixes = { 'errori_comuni': { 'regex': True, 'recursive': True, 'msg': { 'it':u'Bot: Correzione di uno o più [[Utente:IagaBot/Errori_comuni|errori comuni]]' }, 'replacements': [ (u'([a-z]|[ìèéùòà\)]) ?\\.([A-Z]|È)', ur'\1. \2'), (u'([a-z]|[ìèéùòà\)]) ?,([a-z]|")', ur'\1, \2'), (u'([a-z]|[ìèéùòà\)]) ?(:|;)(\\w|")', ur'\1\2 \3'), (u'(\\w|[ìèéùòà\)]) (,|\\.|:|;)', ur'\1\2'), (u'(\\b|\\.)E\' ', ur'\1È '), (u'\\b([Uu])n\'amico\\b', ur'\1n amico'), (u'\\b(\\w+)zzion(\\w+)\\b', ur'\1zion\2'), (u'\\b([aA])(bben|ccioc)chè\\b', ur'\1\2ché'), (u'\\b([aA])(ffin|ncor|nzi|tteso)chè\\b', ur'\1\2ché'), (u'\\b([aA])ccellera(re|zione)\\b', ur'\1ccelera\2'), (u'\\b([aA])e?reoport(o|i)\\b', ur'\1eroport\2'), (u'\\b([aA])ggiottaggio\\b', ur'\1ggiotaggio'), (u'\\b([aA])l(cun|lor|tro)chè\\b', ur'\1l\2ché'), (u'\\b([aA])ltretanto\\b', ur'\1ltrettanto'), (u'\\b([aA])ppropiat(a|e|i|o)\\b', ur'\1ppropriat\2'), (u'\\b([aA])pprovigionamento\\b', ur'\1pprovvigionamento'), (u'\\b([aA])quistare\\b', ur'\1cquistare'), (u'\\b([aA])vve(gna|gnadio|nga|ngadio)chè\\b', ur'\1vve\2ché'), (u'\\b([bB])enchè\\b', ur'\1enché'), (u'\\b([cC])(hec|ioc|omec|onciofosse|ontutto|osic|otal)chè\\b', ur'\1\2ché'), (u'\\b([cC])osidett(o|i|e|a)\\b', ur'\1osiddett\2'), (u'\\b([cC])ospiqu(o|i|e|a)\\b', ur'\1ospicu\2'), (u'\\b([cC]om|)([pP])ropi(o|età|etari|etari[aeo])\\b', ur'\1\2ropri\3'), (u'\\b([dD])(ac|appoi|imodo|opo|opodi)chè\\b', ur'\1\2ché'), (u'\\b([dD]e|[cC]oef)ficen(za|te|ti)\\b', ur'\1ficien\2'), (u'\\b([eE])ssendochè\\b', ur'\1ssendoché'), (u'\\b([eE])vaqua(re|[t][oiae]|zione)\\b', ur'\1vacua\2'), (u'\\b([fF])(inattanto|intanto|inac|inattanto|in|uor)chè\\b', ur'\1\2ché'), (u'\\b([gG])(iac|ran|iafosse|iafossecosa)chè\\b', ur'\1\2ché'), (u'\\b([iI])(nfinattanto|nquanto)chè\\b', ur'\1\2ché'), (u'\\b([iI])gen(e|ic[oai]|iche)\\b', ur'\1gien\2'), (u'\\b([iI])ngenier(e|i)\\b', ur'\1ngegner\2'), (u'\\b([iI])nnoqu(i|o|a|e)\\b', ur'\1nnocu\2'), (u'\\b([iI])nzio\\b', ur'\1nizio'), (u'\\b([iI]l|)([lL])eggittim(o|i|e|a)\\b', ur'\1\2egittim\3'), (u'\\b([iI]n|)([cC])oscen(za|te|ti)\\b', ur'\1\2oscien\3'), (u'\\b([iI]n|)([sS])ufficen(za|te|ti)\\b', ur'\1\2ufficien\3'), (u'\\b([lL])orchè\\b', ur'\1orché'), (u'\\b([mM])acchè\\b', ur'\1acché'), (u'\\b([mM])etereologi(a|co|ci|che)\\b', ur'\1eteorologi\2'), (u'\\b([nN])on(so|)chè\\b', ur'\1on\2ché'), (u'\\b([oO])(ltre|nde)chè\\b', ur'\1\2ché'), (u'\\b([oO])nniscen(za|te|ti)\\b', ur'\1nniscien\2'), (u'\\b([oO])noreficenza\\b', ur'\1norificenza'), (u'\\b([oO])vverossia\\b', ur'\1vverosia'), (u'\\b([pP])(oi|oscia|resso|ur)chè\\b', ur'\1\2ché'), (u'\\b([pP])aralello\\b', ur'\1arallelo'), (u'\\b([pP])er(cioc|lo|oc|)chè\\b', ur'\1er\2ché'), (u'\\b([pP])iú\\b', ur'\1iù'), (u'\\b([pP])rospicent(e|i)\\b', ur'\1rospicient\2'), (u'\\b([pP])roveniendo\\b', ur'\1rovenendo'), (u'\\b([pP]rofi|[pP]romis)qu(o|a|e|i)\\b', ur'\1cu\2'), (u'\\b([qQ])ua(nto|si)chè\\b', ur'\1ua\2ché'), (u'\\b([rR]i|)([cC])onoscien(za|te|ti)\\b', ur'\1\2onoscen\3'), (u'\\b([sS])(econdo|ennon|enon|tante)chè\\b', ur'\1\2ché'), (u'\\b([sS])cenz([ae])\\b', ur'\1cienz\2'), (u'\\b([sS])enonch[èé]\b', ur'\1ennonch\2'), (u'\\b([sS])i(c|nattanto|n|ntanto)chè\\b', ur'\1i\2ché'), (u'\\b([sS])oprattuto\\b', ur'\1oprattutto'), (u'\\b([sS])uperfice\\b', ur'\1uperficie'), (u'\\b([tT])(al|almente|anto|ranne|utto)chè\\b', ur'\1\2ché'), (u'\\b([tT])errittorio\\b', ur'\1erritorio') ], 'exceptions': { 'inside-tags': [ 'hyperlink', 'link', 'comment', 'timeline', 'gallery', 'math', 'pre', 'startspace', 'source', 'nowiki' ] , 'inside': [ r'(?s)<[^>]+>', r'(?s)\{[^\}]+\}', r'&[^;]+;', r'\[(\w|\W)+\]', r'(a|d).C.', r'\"(\w|\W)+\"', r'(?i)(s\.n\.c|s\.r\.l|s\.a\.s|s\.p\.a)' ], 'text-contains': [ 'IagaBot' ] } }, 'tutti': { 'regex': True, 'recursive': True, 'msg': { 'it':u'Bot: Correzione di uno o più [[Utente:IagaBot/Errori_comuni|errori comuni]]' }, 'replacements': [ (u'([a-z]|[ìèéùòà\)]) ?\\.([A-Z]|È)', ur'\1. \2'), (u'([a-z]|[ìèéùòà\)]) ?,([a-z]|")', ur'\1, \2'), (u'([a-z]|[ìèéùòà\)]) ?(:|;)(\\w|")', ur'\1\2 \3'), (u'(\\w|[ìèéùòà\)]) (,|\\.|:|;)', ur'\1\2'), (u'(\\b|\\.)E\' ', ur'\1È '), (u'\\b([Uu])n\'amico\\b', ur'\1n amico'), (u'\\b(\\w+)zzion(\\w+)\\b', ur'\1zion\2'), (u'\\b([aA])(bben|ccioc)chè\\b', ur'\1\2ché'), (u'\\b([aA])(ffin|ncor|nzi|tteso)chè\\b', ur'\1\2ché'), (u'\\b([aA])ccellera(re|zione)\\b', ur'\1ccelera\2'), (u'\\b([aA])e?reoport(o|i)\\b', ur'\1eroport\2'), (u'\\b([aA])ggiottaggio\\b', ur'\1ggiotaggio'), (u'\\b([aA])l(cun|lor|tro)chè\\b', ur'\1l\2ché'), (u'\\b([aA])ltretanto\\b', ur'\1ltrettanto'), (u'\\b([aA])ppropiat(a|e|i|o)\\b', ur'\1ppropriat\2'), (u'\\b([aA])pprovigionamento\\b', ur'\1pprovvigionamento'), (u'\\b([aA])quistare\\b', ur'\1cquistare'), (u'\\b([aA])vve(gna|gnadio|nga|ngadio)chè\\b', ur'\1vve\2ché'), (u'\\b([bB])enchè\\b', ur'\1enché'), (u'\\b([cC])(hec|ioc|omec|onciofosse|ontutto|osic|otal)chè\\b', ur'\1\2ché'), (u'\\b([cC])osidett(o|i|e|a)\\b', ur'\1osiddett\2'), (u'\\b([cC])ospiqu(o|i|e|a)\\b', ur'\1ospicu\2'), (u'\\b([cC]om|)([pP])ropi(o|età|etari|etari[aeo])\\b', ur'\1\2ropri\3'), (u'\\b([dD])(ac|appoi|imodo|opo|opodi)chè\\b', ur'\1\2ché'), (u'\\b([dD]e|[cC]oef)ficen(za|te|ti)\\b', ur'\1ficien\2'), (u'\\b([eE])ssendochè\\b', ur'\1ssendoché'), (u'\\b([eE])vaqua(re|[t][oiae]|zione)\\b', ur'\1vacua\2'), (u'\\b([fF])(inattanto|intanto|inac|inattanto|in|uor)chè\\b', ur'\1\2ché'), (u'\\b([gG])(iac|ran|iafosse|iafossecosa)chè\\b', ur'\1\2ché'), (u'\\b([iI])(nfinattanto|nquanto)chè\\b', ur'\1\2ché'), (u'\\b([iI])gen(e|ic[oai]|iche)\\b', ur'\1gien\2'), (u'\\b([iI])ngenier(e|i)\\b', ur'\1ngegner\2'), (u'\\b([iI])nnoqu(i|o|a|e)\\b', ur'\1nnocu\2'), (u'\\b([iI])nzio\\b', ur'\1nizio'), (u'\\b([iI]l|)([lL])eggittim(o|i|e|a)\\b', ur'\1\2egittim\3'), (u'\\b([iI]n|)([cC])oscen(za|te|ti)\\b', ur'\1\2oscien\3'), (u'\\b([iI]n|)([sS])ufficen(za|te|ti)\\b', ur'\1\2ufficien\3'), (u'\\b([lL])orchè\\b', ur'\1orché'), (u'\\b([mM])acchè\\b', ur'\1acché'), (u'\\b([mM])etereologi(a|co|ci|che)\\b', ur'\1eteorologi\2'), (u'\\b([nN])on(so|)chè\\b', ur'\1on\2ché'), (u'\\b([oO])(ltre|nde)chè\\b', ur'\1\2ché'), (u'\\b([oO])nniscen(za|te|ti)\\b', ur'\1nniscien\2'), (u'\\b([oO])noreficenza\\b', ur'\1norificenza'), (u'\\b([oO])vverossia\\b', ur'\1vverosia'), (u'\\b([pP])(oi|oscia|resso|ur)chè\\b', ur'\1\2ché'), (u'\\b([pP])aralello\\b', ur'\1arallelo'), (u'\\b([pP])er(cioc|lo|oc|)chè\\b', ur'\1er\2ché'), (u'\\b([pP])iú\\b', ur'\1iù'), (u'\\b([pP])rospicent(e|i)\\b', ur'\1rospicient\2'), (u'\\b([pP])roveniendo\\b', ur'\1rovenendo'), (u'\\b([pP]rofi|[pP]romis)qu(o|a|e|i)\\b', ur'\1cu\2'), (u'\\b([qQ])ua(nto|si)chè\\b', ur'\1ua\2ché'), (u'\\b([rR]i|)([cC])onoscien(za|te|ti)\\b', ur'\1\2onoscen\3'), (u'\\b([sS])(econdo|ennon|enon|tante)chè\\b', ur'\1\2ché'), (u'\\b([sS])cenz([ae])\\b', ur'\1cienz\2'), (u'\\b([sS])enonch[èé]\b', ur'\1ennonch\2'), (u'\\b([sS])i(c|nattanto|n|ntanto)chè\\b', ur'\1i\2ché'), (u'\\b([sS])oprattuto\\b', ur'\1oprattutto'), (u'\\b([sS])uperfice\\b', ur'\1uperficie'), (u'\\b([tT])(al|almente|anto|ranne|utto)chè\\b', ur'\1\2ché'), (u'\\b([tT])errittorio\\b', ur'\1erritorio'), ##Da fixes.py (r'(?i)<b>(.*?)</b>', r"'''\1'''"), (r'(?i)<strong>(.*?)</strong>', r"'''\1'''"), (r'(?i)<i>(.*?)</i>', r"''\1''"), (r'(?i)<em>(.*?)</em>', r"''\1''"), (r'(?i)([\r\n])<hr[ /]*>([\r\n])', r'\1----\2'), (r'(?i)<hr ([^>/]+?)>', r'<hr \1 />'), (r'(?i)([\r\n]) *<h1> *([^<]+?) *</h1> *([\r\n])', r"\1= \2 =\3"), (r'(?i)([\r\n]) *<h2> *([^<]+?) *</h2> *([\r\n])', r"\1== \2 ==\3"), (r'(?i)([\r\n]) *<h3> *([^<]+?) *</h3> *([\r\n])', r"\1=== \2 ===\3"), (r'(?i)([\r\n]) *<h4> *([^<]+?) *</h4> *([\r\n])', r"\1==== \2 ====\3"), (r'(?i)([\r\n]) *<h5> *([^<]+?) *</h5> *([\r\n])', r"\1===== \2 =====\3"), (r'(?i)([\r\n]) *<h6> *([^<]+?) *</h6> *([\r\n])', r"\1====== \2 ======\3"), # external link in double brackets (r'\[\[(?P<url>https?://[^\]]+?)\]\]', r'[\g<url>]'), # external link starting with double bracket (r'\[\[(?P<url>https?://.+?)\]', r'[\g<url>]'), # external link with forgotten closing bracket #(r'\[(?P<url>https?://[^\]\s]+)\r\n', r'[\g<url>]\r\n'), # external link ending with double bracket. # do not change weblinks that contain wiki links inside # inside the description (r'\[(?P<url>https?://[^\[\]]+?)\]\](?!\])', r'[\g<url>]'), # wiki link closed by single bracket. # ATTENTION: There are some false positives, for example # Brainfuck code examples or MS-DOS parameter instructions. # There are also sometimes better ways to fix it than # just putting an additional ] after the link. (r'\[\[([^\[\]]+?)\](?!\])', r'[[\1]]'), # wiki link opened by single bracket. # ATTENTION: same as above. (r'(?<!\[)\[([^\[\]]+?)\]\](?!\])', r'[[\1]]'), # template closed by single bracket # ATTENTION: There are some false positives, especially in # mathematical context or program code. (r'{{([^{}]+?)}(?!})', r'{{\1}}') ], 'exceptions': { 'inside-tags': [ 'hyperlink', 'link', 'comment', 'timeline', 'gallery', 'math', 'pre', 'startspace', 'source', 'nowiki' ] , 'inside': [ r'(?s)<[^>]+>', r'(?s)\{[^\}]+\}', r'&[^;]+;', r'\[(\w|\W)+\]', r'(a|d).C.', r'\"(\w|\W)+\"', r'(?i)(s\.n\.c|s\.r\.l|s\.a\.s|s\.p\.a)' ], 'text-contains': [ r'\[CDATA\[', 'IagaBot' ] } }, 'sostituzioni_standard': { 'regex': True, 'msg': { 'it':u'Bot: [[Utente:IagaBot/Sostituzioni_standard|sostituzioni standard]]' }, 'replacements': [ (u'\[\[[Ii]mage:(.*?)\]\]', ur'[[Immagine:\1]]'), (u'== ?[vV]edi [aA]nche ?==', ur'== Voci correlate =='), (u'== ?[lL]ink [eE]sterni ?==', ur'== Collegamenti esterni =='), (u'== ?[vV]oci [Cc]orrelate ?==', ur'== Voci correlate =='), (u'== ?[cC]ollegamenti [Ee]sterni ?==', ur'== Collegamenti esterni ==') ] }, }