사용자:Utoleetest/libre wikistat

< 사용자:Utoleetest
Utoleetest (토론 | 기여)님의 2020년 8월 22일 (토) 01:24 판 (clean up)

위키/목록에서 위키의 문서 수/총 페이지 수 통계를 갱신할 때 이용되는 파이썬 스크립트입니다. Pywikibot을 통해 돌아갈 수 있게 설게했습니다.

사용방법

  1. 우선 파이썬을 설치한 후, 시스템의 환경변수를 추가해서 커맨드 창에서 파이썬이 작동하게 설정합니다.
  2. 그 다음 PIP 설치방법 안내 페이지를 이용해서 PIP 패키지를 설치합니다.
  3. PIP를 설치했으면 커맨드 창에 PIP install selenium을 입력해 selenium 패키기를 설치합니다.
  4. 다음 링크를 이용해서 크롬 브라우저용 Chromedriver를 다운로드 받습니다. 반드시 pwb.py가 있는 곳에 chromedriver 실행 프로그램이 있어야 합니다.
  5. 다음 아래의 소스코드를 복사/붙여넣기 후에 pywikibot이 설치된 폴더에서 scripts/userscripts에 libre_wikistat.py로 저장합니다.(커스텀 스크립트를 저장할 수 있는 곳)
  6. 다 준비가 되었다면 cd C:/(Pywikibot이 설치된 카테고리) 명령어를 이용해 Pywikibot이 있는 폴더로 이동한 뒤에 커맨드 창에
> python pwb.py libre_wikistat

이라고 입력하면 작동합니다. 상당히 지저분한 코드인 검을 유감스럽게 생각합니다.

스크립트 소스

from selenium import webdriver
import re
import datetime
import time
import pywikibot

#숫자 끊어주는 함수
def digit_div(arg):
    try:
        str_new=str(format(int(arg), ','))
    except:
        str_new=arg
    return str_new

#문자열 한 자리면 0 강제삽입
def zeroinput(_str):
    if len(_str) == 0:
        return '00'
    elif len(_str) == 1:
        return '0'+_str
    else:
        return _str

wiki_pattern=['namu', 'nuri', 'alpha', 'dc', 'veda', 'open', 'chan', 'zeta', 'bigfo', 'femi',\
              'wikt', 'wiks', 'wiki', 'poke', 'footk', 'jwiki','enwiki', 'enwikt', \
              'enwikb', 'enwikn', 'enwikq', 'enwiks', 'mw', 'wimeta', 'common', 'widata',\
              'uncyclo', 'starwars', 'fandom', 'bulba', 'trope', 'jawiki', 'zhwiki']
wiki_sol_pattern = ['scpko', 'nosm', 'scp', 'nico', 'baidu' ] #단독통계
article_num= dict()
page_num=dict()
for pat in wiki_pattern: #위키패턴 정의
    article_num[pat]='0'
    page_num[pat]='0'
for pat in wiki_sol_pattern: #위키패턴 정의, 통계 단독
    article_num[pat]='0'
        

driver = webdriver.Chrome()

#노스모크 통계 - 단독통계
driver.get('http://no-smok.net/nsmk/SystemInfo')
nos_1=driver.find_element_by_xpath('//*[@id="aline-1"]/table/tbody/tr[4]/td/span')
article_num['nosm']=digit_div(nos_1.text)

#scp 한국어위 통계 - 단독통계
driver.get('http://ko.scp-wiki.net/system:list-all-pages')
scpko_1=driver.find_element_by_xpath('//*[@id="page-content"]/div[1]/p')
article_num['scpko']=digit_div(scpko_1.text.replace('전체 페이지 수: ',''))


#나무위키 통계
driver.get('https://namu.wiki/w/%EB%82%98%EB%AC%B4%EC%9C%84%ED%82%A4:%ED%86%B5%EA%B3%84')
time.sleep(1)
namu_1=driver.find_element_by_xpath('//*[@id="app"]/div/div[2]/article/div[3]/div[2]/div/div/div[5]/table/tbody/tr[1]/td[2]/div')
page_num['namu'] =  digit_div(namu_1.text)
namu_2=driver.find_element_by_xpath('//*[@id="app"]/div/div[2]/article/div[3]/div[2]/div/div/div[5]/table/tbody/tr[2]/td[2]/div')
article_num['namu'] = digit_div(namu_2.text)

#알파위키 통계
driver.get('https://awiki.theseed.io/w/%EC%95%8C%ED%8C%8C%EC%9C%84%ED%82%A4:%ED%86%B5%EA%B3%84/i')
time.sleep(1)
alpha_1=driver.find_element_by_xpath('//*[@id="app"]/div/div[3]/div[2]/div[2]/div/div[2]/div/div/div[1]/table/tbody/tr[2]/td[3]/div')
page_num['alpha'] =  digit_div(alpha_1.text)
alpha_2=driver.find_element_by_xpath('//*[@id="app"]/div/div[3]/div[2]/div[2]/div/div[2]/div/div/div[1]/table/tbody/tr[3]/td[2]/div')
article_num['alpha'] = digit_div(alpha_2.text)

#누리위키 통계
driver.get('https://nuriwiki.net/wiki/%ED%8A%B9%EC%88%98:%ED%86%B5%EA%B3%84')
time.sleep(1)
nuri_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['nuri'] =  nuri_1.text
nuri_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['nuri'] = nuri_2.text

#디시위키 통계
driver.get('https://wiki.dcinside.com/wiki/%ed%8a%b9%ec%88%98:%ed%86%b5%ea%b3%84')
time.sleep(1)
dc_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['dc'] =  dc_1.text
dc_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['dc'] = dc_2.text

#리그베다위키 통계
driver.get('http://rigvedawiki.net/w/%EC%8B%9C%EC%8A%A4%ED%85%9C%EC%A0%95%EB%B3%B4')
time.sleep(1)
veda_1=driver.find_element_by_xpath('//*[@id="macro-1"]')
page_num['veda'] = digit_div(veda_1.text)
veda_2=driver.find_element_by_xpath('//*[@id="macro-3"]')
article_num['veda'] = digit_div(veda_2.text)

#오픈위키 통계
driver.get('http://openwiki.kr/about')
time.sleep(1)
openwiki_xpath1=['//*[@id="dokuwiki__content"]/div[20]/ul/li[2]/ul/li[1]/div/em','//*[@id="dokuwiki__content"]/div[21]/ul/li[2]/ul/li[1]/div/em']
try:
    open_1 = driver.find_element_by_xpath(openwiki_xpath1[0])
except:
    open_1 = driver.find_element_by_xpath(openwiki_xpath1[1])
article_num['open'] = digit_div(open_1.text)
openwiki_xpath2=['//*[@id="dokuwiki__content"]/div[20]/ul/li[2]/ul/li[2]/div/em', '//*[@id="dokuwiki__content"]/div[21]/ul/li[2]/ul/li[2]/div/em']
try:
    open_2=driver.find_element_by_xpath(openwiki_xpath2[0])
except:
    open_2=driver.find_element_by_xpath(openwiki_xpath2[1])
try:
    page_num['open'] = format(int(open_1.text)+int(open_2.text), ',')
except:
    page_num['open'] = '0'

#위키짱 통계
driver.get('https://wiki-chan.net/w/%ED%8A%B9%EC%88%98:%ED%86%B5%EA%B3%84')
time.sleep(1)
chan_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['chan'] =  chan_1.text
chan_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['chan'] = chan_2.text

#제타위키 통계
driver.get('https://www.zetawiki.com/wiki/%ED%8A%B9%EC%88%98:%ED%86%B5%EA%B3%84')
time.sleep(1)
zeta_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['zeta'] =  zeta_1.text
zeta_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['zeta'] = zeta_2.text

#큰숲백과 통계
driver.get('https://bigforest.miraheze.org/wiki/%ED%8A%B9%EC%88%98:%ED%86%B5%EA%B3%84')
time.sleep(1)
bigfo_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['bigfo'] =  bigfo_1.text
bigfo_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['bigfo'] = bigfo_2.text

#페미위키 통계
driver.get('https://femiwiki.com/w/%ED%8A%B9%EC%88%98:%ED%86%B5%EA%B3%84')
time.sleep(1)
femi_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['femi'] =  femi_1.text
femi_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['femi'] = femi_2.text

#위키낱말사전 통계
driver.get('https://ko.wiktionary.org/wiki/%ED%8A%B9%EC%88%98:%ED%86%B5%EA%B3%84')
time.sleep(1)
wikt_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['wikt'] = wikt_1.text
wikt_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['wikt'] = wikt_2.text

#위키문헌 통계
driver.get('https://ko.wikisource.org/wiki/%ED%8A%B9%EC%88%98:%ED%86%B5%EA%B3%84')
time.sleep(1)
wiks_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['wiks'] = wiks_1.text
wiks_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['wiks'] = wiks_2.text

#위키백과 통계
driver.get('https://ko.wikipedia.org/wiki/%ED%8A%B9%EC%88%98:%ED%86%B5%EA%B3%84')
time.sleep(1)
wiki_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['wiki'] = wiki_1.text
wiki_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['wiki'] = wiki_2.text

#포켓몬 위키 통계
driver.get('https://pokemon.fandom.com/ko/wiki/%ED%8A%B9%EC%88%98%EA%B8%B0%EB%8A%A5:%ED%86%B5%EA%B3%84')
time.sleep(1)
poke_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['poke'] = poke_1.text
poke_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['poke'] = poke_2.text

#풋케위키 통계
driver.get('https://footballk.net/mediawiki/%ED%8A%B9%EC%88%98:%ED%86%B5%EA%B3%84')
time.sleep(1)
footk_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['footk'] = footk_1.text
footk_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['footk'] = footk_2.text

#제이위키 통계
driver.get('http://jwiki.kr/wiki/index.php/%ED%8A%B9%EC%88%98:%ED%86%B5%EA%B3%84')
time.sleep(1)
jwiki_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['jwiki'] = jwiki_1.text
jwiki_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['jwiki'] = jwiki_2.text

#영어 위키백과 통계
driver.get('https://en.wikipedia.org/wiki/Special:Statistics')
time.sleep(1)
enwiki_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['enwiki'] = enwiki_1.text
enwiki_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['enwiki'] = enwiki_2.text

#영어 위키낱말사전 통계
driver.get('https://en.wiktionary.org/wiki/Special:Statistics')
time.sleep(1)
enwikt_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['enwikt'] = enwikt_1.text
enwikt_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['enwikt'] = enwikt_2.text

#영어 위키책 통계
driver.get('https://en.wikibooks.org/wiki/Special:Statistics')
time.sleep(1)
enwikb_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['enwikb'] = enwikb_1.text
enwikb_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['enwikb'] = enwikb_2.text

#영어 위키뉴스 통계
driver.get('https://en.wikinews.org/wiki/Special:Statistics')
time.sleep(1)
enwikn_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['enwikn'] = enwikn_1.text
enwikn_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['enwikn'] = enwikn_2.text

#영어 위키인용집 통계
driver.get('https://en.wikiquote.org/wiki/Special:Statistics')
time.sleep(1)
enwikq_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['enwikq'] = enwikq_1.text
enwikq_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['enwikq'] = enwikq_2.text

#영어 위키문헌 통계
driver.get('https://wikisource.org/wiki/Special:Statistics')
time.sleep(1)
enwiks_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['enwiks'] = enwiks_1.text
enwiks_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['enwiks'] = enwiks_2.text

#미디어위키 메인 통계
driver.get('https://www.mediawiki.org/wiki/Special:Statistics')
time.sleep(1)
mw_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['mw'] = mw_1.text
mw_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['mw'] = mw_2.text

#위키미디어 메타 통계
driver.get('https://meta.wikimedia.org/wiki/Special:Statistics')
time.sleep(1)
wimeta_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['wimeta'] = wimeta_1.text
wimeta_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['wimeta'] = wimeta_2.text

#위키미디어 공용 통계
driver.get('https://commons.wikimedia.org/wiki/Special:Statistics')
time.sleep(1)
common_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['common'] = common_1.text
common_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['common'] = common_2.text

#위키데이터 통계
driver.get('https://www.wikidata.org/wiki/Special:Statistics')
time.sleep(1)
widata_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['widata'] = widata_1.text
widata_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['widata'] = widata_2.text

#언사이클로피디아 통계
driver.get('https://en.uncyclopedia.co/wiki/Special:Statistics')
time.sleep(1)
uncyclo_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['uncyclo'] = uncyclo_1.text
uncyclo_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['uncyclo'] = uncyclo_2.text

#우키피디아 통계
driver.get('https://starwars.fandom.com/wiki/Special:Statistics')
time.sleep(1)
starwars_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['starwars'] = starwars_1.text
starwars_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['starwars'] = starwars_2.text

#팬덤 중앙 커뮤니티 통계
driver.get('https://community.fandom.com/wiki/Special:Statistics')
time.sleep(1)
fandom_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['fandom'] = fandom_1.text
fandom_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['fandom'] = fandom_2.text

#Bulbapedia 통계
driver.get('https://bulbapedia.bulbagarden.net/wiki/Special:Statistics')
time.sleep(1)
bulba_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['bulba'] = bulba_1.text
bulba_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['bulba'] = bulba_2.text

#TV Tropes 통계
driver.get('https://tvtropes.org/pmwiki/articlecount.php')
time.sleep(1)
trope_1=driver.find_element_by_xpath('//*[@id="wikimiddle"]/p[2]').text
article_tropes = trope_1.splitlines()
article_num['trope'] = digit_div(article_tropes[2].replace(': Main', ''))
trope_2=driver.find_element_by_xpath('//*[@id="wikimiddle"]/p[3]')
page_trope = trope_2.text.replace('Total: ','')
trope_2=driver.find_element_by_xpath('//*[@id="wikimiddle"]/p[3]')
page_num['trope'] = digit_div(trope_2.text.replace('Total: ',''))

#영어 SCP 통계
driver.get('http://www.scpwiki.com/system:list-all-pages/p/500')
cnt1 = len(driver.find_elements_by_xpath('//*[@id="page-content"]/div/div'))
cnt2_item = driver.find_element_by_xpath('//*[@id="page-content"]/div/div[38]/span[8]')
cnt2 = int(cnt2_item.text)
driver.get('http://www.scpwiki.com/system:list-all-pages/p/1')
cnt3 = len(driver.find_elements_by_xpath('//*[@id="page-content"]/div/div'))
article_num['scp']= digit_div(cnt3*(cnt2-1)+cnt1)

#일본어 위키백과 통계
driver.get('https://ja.wikipedia.org/wiki/%E7%89%B9%E5%88%A5:%E7%B5%B1%E8%A8%88')
time.sleep(1)
jawiki_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['jawiki'] = jawiki_1.text
jawiki_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['jawiki'] = jawiki_2.text

#니코니코 대백과 통계 - 단독통계
driver.get('https://dic.nicovideo.jp/')
time.sleep(1)
nico_1=driver.find_element_by_xpath('//*[@id="basebody"]/footer/div/div[1]/ul/li[1]')
article_num['nico']=digit_div(nico_1.text.replace('記事:',''))

#중국어 위키백과 통계
driver.get('https://zh.wikipedia.org/wiki/Special:Statistics')
time.sleep(1)
zhwiki_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['zhwiki'] = zhwiki_1.text
zhwiki_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['zhwiki'] = zhwiki_2.text

#바이두 백과 통계
driver.get('https://baike.baidu.com')
time.sleep(3)
baidu_1 = driver.find_element_by_xpath('//*[@id="lemmaNum"]')
article_num['baidu'] = baidu_1.text


#통계 작성 시점 
x=datetime.datetime.now()
msg_date= '<!--date-->'+str(x.year)+'-'+str(x.month)+'-'+str(x.day)
msg_time ='<!--time-->'+zeroinput(str(x.hour))+':'+zeroinput(str(x.minute))+'(KST)'

print('날짜:', msg_date)
print('시간:', msg_time)

driver.close()

#위키/목록 텍스트 추출
site = pywikibot.Site()
page = pywikibot.Page(site, "위키/목록")
txt = page.text


#통계 수정
for pat in wiki_pattern:
    pat1_0='<!--'+pat+'-->'
    txt=re.sub(pat1_0+r'[0-9,]*', pat1_0+article_num[pat], txt)
for pat in wiki_pattern:
    pat2_0='<!--'+pat+'_2-->'
    txt=re.sub(pat2_0+r'[0-9,]*', pat2_0+page_num[pat], txt)
for pat in wiki_sol_pattern:
    pat3_0='<!--'+pat+'-->'
    txt=re.sub(pat3_0+r'[0-9,]*', pat3_0+article_num[pat], txt)

#시간 수정    
msg_date_old= '<!--date-->'+r'[0-9]{4,}\-[0-9]{1,2}\-[0-9]{1,2}'
msg_time_old= '<!--time-->'+r'[0-9]{1,2}:[0-9]{1,2}\(KST\)'
txt=re.sub(msg_date_old, msg_date, txt)
txt=re.sub(msg_time_old, msg_time, txt)

#위키/목록 페이지 수정
page.text = txt
page.save('ko:통계 갱신')