사용자:Utoleetest/libre wikistat

< 사용자:Utoleetest
Utoleetest (토론 | 기여)님의 2020년 8월 2일 (일) 03:24 판 (코드 작성)
(차이) ← 이전 판 | 최신판 (차이) | 다음 판 → (차이)

위키/목록에서 위키의 문서 수/총 페이지 수 통계를 갱신할 때 이용되는 파이썬 스크립트입니다. Pywikibot을 통해 돌아갈 수 있게 설게했습니다.

사용방법

  1. 우선 파이썬을 설치한 후, 시스템의 환경변수를 추가해서 커맨드 창에서 파이썬이 작동하게 설정합니다.
  2. 그 다음 PIP 설치방법 안내 페이지를 이용해서 PIP 패키지를 설치합니다.
  3. PIP를 설치했으면 커맨드 창에 PIP install selenium을 입력해 selenium 패키기를 설치합니다.
  4. 다음 링크를 이용해서 크롬 브라우저용 Chromedriver를 다운로드 받습니다. 반드시 pwb.py가 있는 곳에 chromedriver 실행 프로그램이 있어야 합니다.
  5. 다음 아래의 소스코드를 복사/붙여넣기 후에 pywikibot이 설치된 폴더에서 scripts/userscripts에 libre_wikistat.py로 저장합니다.(커스텀 스크립트를 저장할 수 있는 곳)
  6. 다 준비가 되었다면 cd C:/(Pywikibot이 설치된 카테고리) 명령어를 이용해 Pywikibot이 있는 폴더로 이동한 뒤에 커맨드 창에
> python pwb.py libre_wikistat

이라고 입력하면 작동합니다. 상당히 지저분한 코드이며, 니코니코 대백과바이두 백과 같은 일부 웹사이트는 크롤링에 실패해서 수동으로 작성해주서야 합니다.

스크립트 소스

from selenium import webdriver
import re
import datetime
import time
import pywikibot

#숫자 끊어주는 함수
def digit_div(arg):
    try:
        str_new=str(format(int(arg), ','))
    except:
        str_new=arg
    return str_new

wiki_pattern=['namu', 'nuri', 'alpha', 'dc', 'veda', 'open', 'chan', 'zeta', 'bigfo', 'femi',\
              'wikt', 'wiks', 'wiki', 'poke', 'footk', 'jwiki','enwiki', 'enwikt', \
              'enwikb', 'enwikn', 'enwikq', 'enwiks', 'mw', 'wimeta', 'common', 'widata',\
              'uncyclo', 'starwars', 'fandom', 'bulba', 'trope', 'jawiki', 'zhwiki']
wiki_sol_pattern = ['scpko', 'nosm' ] #단독통계
article_num= dict()
page_num=dict()
for pat in wiki_pattern: #위키패턴 정의
    article_num[pat]='0'
    page_num[pat]='0'
for pat in wiki_sol_pattern: #위키패턴 정의, 통계 단독
    article_num[pat]='0'
        

driver = webdriver.Chrome()

#노스모크 통계 - 단독통계
driver.get('http://no-smok.net/nsmk/SystemInfo')
time.sleep(1)
nos_1=driver.find_element_by_xpath('//*[@id="aline-1"]/table/tbody/tr[4]/td/span')
article_num['nosm']=digit_div(nos_1.text)
time.sleep(1)

#scp 한국어위 통계 - 단독통계
driver.get('http://ko.scp-wiki.net/system:list-all-pages')
time.sleep(1)
scpko_1=driver.find_element_by_xpath('//*[@id="page-content"]/div[1]/p')
article_num['scpko']=digit_div(scpko_1.text.replace('전체 페이지 수: ',''))
time.sleep(1)


#나무위키 통계
driver.get('https://namu.wiki/w/%EB%82%98%EB%AC%B4%EC%9C%84%ED%82%A4:%ED%86%B5%EA%B3%84')
time.sleep(1)
namu_1=driver.find_element_by_xpath('//*[@id="app"]/div/div[2]/article/div[3]/div[2]/div/div/div[5]/table/tbody/tr[1]/td[2]/div')
page_num['namu'] =  digit_div(namu_1.text)
namu_2=driver.find_element_by_xpath('//*[@id="app"]/div/div[2]/article/div[3]/div[2]/div/div/div[5]/table/tbody/tr[2]/td[2]/div')
article_num['namu'] = digit_div(namu_2.text)
time.sleep(1)

#알파위키 통계
driver.get('https://awiki.theseed.io/w/%EC%95%8C%ED%8C%8C%EC%9C%84%ED%82%A4:%ED%86%B5%EA%B3%84/i')
time.sleep(1)
alpha_1=driver.find_element_by_xpath('//*[@id="app"]/div/div[3]/div[2]/div[2]/div/div[2]/div/div/div[1]/table/tbody/tr[2]/td[3]/div')
page_num['alpha'] =  digit_div(alpha_1.text)
alpha_2=driver.find_element_by_xpath('//*[@id="app"]/div/div[3]/div[2]/div[2]/div/div[2]/div/div/div[1]/table/tbody/tr[3]/td[2]/div')
article_num['alpha'] = digit_div(alpha_2.text)
time.sleep(1)

#누리위키 통계
driver.get('https://nuriwiki.net/wiki/%ED%8A%B9%EC%88%98:%ED%86%B5%EA%B3%84')
time.sleep(1)
nuri_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['nuri'] =  nuri_1.text
nuri_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['nuri'] = nuri_2.text
time.sleep(1)

#디시위키 통계
driver.get('https://wiki.dcinside.com/wiki/%ed%8a%b9%ec%88%98:%ed%86%b5%ea%b3%84')
time.sleep(1)
dc_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['dc'] =  dc_1.text
dc_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['dc'] = dc_2.text
time.sleep(1)

#리그베다위키 통계
driver.get('http://rigvedawiki.net/w/%EC%8B%9C%EC%8A%A4%ED%85%9C%EC%A0%95%EB%B3%B4')
time.sleep(1)
veda_1=driver.find_element_by_xpath('//*[@id="macro-1"]')
page_num['veda'] = digit_div(veda_1.text)
veda_2=driver.find_element_by_xpath('//*[@id="macro-3"]')
article_num['veda'] = digit_div(veda_2.text)
time.sleep(1)

#오픈위키 통계
driver.get('http://openwiki.kr/about')
time.sleep(1)
openwiki_xpath1=['//*[@id="dokuwiki__content"]/div[20]/ul/li[2]/ul/li[1]/div/em','//*[@id="dokuwiki__content"]/div[21]/ul/li[2]/ul/li[1]/div/em']
try:
    open_1 = driver.find_element_by_xpath(openwiki_xpath1[0])
except:
    open_1 = driver.find_element_by_xpath(openwiki_xpath1[1])
article_num['open'] = digit_div(open_1.text)
openwiki_xpath2=['//*[@id="dokuwiki__content"]/div[20]/ul/li[2]/ul/li[2]/div/em', '//*[@id="dokuwiki__content"]/div[21]/ul/li[2]/ul/li[2]/div/em']
try:
    open_2=driver.find_element_by_xpath(openwiki_xpath2[0])
except:
    open_2=driver.find_element_by_xpath(openwiki_xpath2[1])
try:
    page_num['open'] = format(int(open_1.text)+int(open_2.text), ',')
except:
    page_num['open'] = '0'
time.sleep(1)

#위키짱 통계
driver.get('https://wiki-chan.net/w/%ED%8A%B9%EC%88%98:%ED%86%B5%EA%B3%84')
time.sleep(1)
chan_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['chan'] =  chan_1.text
chan_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['chan'] = chan_2.text
time.sleep(1)

#제타위키 통계
driver.get('https://www.zetawiki.com/wiki/%ED%8A%B9%EC%88%98:%ED%86%B5%EA%B3%84')
time.sleep(1)
zeta_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['zeta'] =  zeta_1.text
zeta_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['zeta'] = zeta_2.text
time.sleep(1)

#큰숲백과 통계
driver.get('https://bigforest.miraheze.org/wiki/%ED%8A%B9%EC%88%98:%ED%86%B5%EA%B3%84')
time.sleep(1)
bigfo_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['bigfo'] =  bigfo_1.text
bigfo_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['bigfo'] = bigfo_2.text
time.sleep(1)

#페미위키 통계
driver.get('https://femiwiki.com/w/%ED%8A%B9%EC%88%98:%ED%86%B5%EA%B3%84')
time.sleep(1)
femi_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['femi'] =  femi_1.text
femi_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['femi'] = femi_2.text
time.sleep(1)

#위키낱말사전 통계
driver.get('https://ko.wiktionary.org/wiki/%ED%8A%B9%EC%88%98:%ED%86%B5%EA%B3%84')
time.sleep(1)
wikt_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['wikt'] = wikt_1.text
wikt_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['wikt'] = wikt_2.text
time.sleep(1)

#위키문헌 통계
driver.get('https://ko.wikisource.org/wiki/%ED%8A%B9%EC%88%98:%ED%86%B5%EA%B3%84')
time.sleep(1)
wiks_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['wiks'] = wiks_1.text
wiks_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['wiks'] = wiks_2.text
time.sleep(1)

#위키백과 통계
driver.get('https://ko.wikipedia.org/wiki/%ED%8A%B9%EC%88%98:%ED%86%B5%EA%B3%84')
time.sleep(1)
wiki_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['wiki'] = wiki_1.text
wiki_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['wiki'] = wiki_2.text
time.sleep(1)

#포켓몬 위키 통계
driver.get('https://pokemon.fandom.com/ko/wiki/%ED%8A%B9%EC%88%98%EA%B8%B0%EB%8A%A5:%ED%86%B5%EA%B3%84')
time.sleep(1)
poke_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['poke'] = poke_1.text
poke_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['poke'] = poke_2.text
time.sleep(1)

#풋케위키 통계
driver.get('https://footballk.net/mediawiki/%ED%8A%B9%EC%88%98:%ED%86%B5%EA%B3%84')
time.sleep(1)
footk_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['footk'] = footk_1.text
footk_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['footk'] = footk_2.text
time.sleep(1)

#제이위키 통계
driver.get('http://jwiki.kr/wiki/index.php/%ED%8A%B9%EC%88%98:%ED%86%B5%EA%B3%84')
time.sleep(1)
jwiki_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['jwiki'] = jwiki_1.text
jwiki_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['jwiki'] = jwiki_2.text
time.sleep(1)

#영어 위키백과 통계
driver.get('https://en.wikipedia.org/wiki/Special:Statistics')
time.sleep(1)
enwiki_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['enwiki'] = enwiki_1.text
enwiki_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['enwiki'] = enwiki_2.text
time.sleep(1)

#영어 위키낱말사전 통계
driver.get('https://en.wiktionary.org/wiki/Special:Statistics')
time.sleep(1)
enwikt_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['enwikt'] = enwikt_1.text
enwikt_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['enwikt'] = enwikt_2.text
time.sleep(1)

#영어 위키책 통계
driver.get('https://en.wikibooks.org/wiki/Special:Statistics')
time.sleep(1)
enwikb_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['enwikb'] = enwikb_1.text
enwikb_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['enwikb'] = enwikb_2.text
time.sleep(1)

#영어 위키뉴스 통계
driver.get('https://en.wikinews.org/wiki/Special:Statistics')
time.sleep(1)
enwikn_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['enwikn'] = enwikn_1.text
enwikn_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['enwikn'] = enwikn_2.text
time.sleep(1)

#영어 위키인용집 통계
driver.get('https://en.wikiquote.org/wiki/Special:Statistics')
time.sleep(1)
enwikq_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['enwikq'] = enwikq_1.text
enwikq_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['enwikq'] = enwikq_2.text
time.sleep(1)

#영어 위키문헌 통계
driver.get('https://wikisource.org/wiki/Special:Statistics')
time.sleep(1)
enwiks_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['enwiks'] = enwiks_1.text
enwiks_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['enwiks'] = enwiks_2.text
time.sleep(1)

#미디어위키 메인 통계
driver.get('https://www.mediawiki.org/wiki/Special:Statistics')
time.sleep(1)
mw_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['mw'] = mw_1.text
mw_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['mw'] = mw_2.text
time.sleep(1)

#위키미디어 메타 통계
driver.get('https://meta.wikimedia.org/wiki/Special:Statistics')
time.sleep(1)
wimeta_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['wimeta'] = wimeta_1.text
wimeta_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['wimeta'] = wimeta_2.text
time.sleep(1)

#위키미디어 공용 통계
driver.get('https://commons.wikimedia.org/wiki/Special:Statistics')
time.sleep(1)
common_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['common'] = common_1.text
common_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['common'] = common_2.text
time.sleep(1)

#위키데이터 통계
driver.get('https://www.wikidata.org/wiki/Special:Statistics')
time.sleep(1)
widata_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['widata'] = widata_1.text
widata_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['widata'] = widata_2.text
time.sleep(1)

#언사이클로피디아 통계
driver.get('https://en.uncyclopedia.co/wiki/Special:Statistics')
time.sleep(1)
uncyclo_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['uncyclo'] = uncyclo_1.text
uncyclo_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['uncyclo'] = uncyclo_2.text
time.sleep(1)

#우키피디아 통계
driver.get('https://starwars.fandom.com/wiki/Special:Statistics')
time.sleep(1)
starwars_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['starwars'] = starwars_1.text
starwars_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['starwars'] = starwars_2.text
time.sleep(1)

#팬덤 중앙 커뮤니티 통계
driver.get('https://community.fandom.com/wiki/Special:Statistics')
time.sleep(1)
fandom_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['fandom'] = fandom_1.text
fandom_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['fandom'] = fandom_2.text
time.sleep(1)

#Bulbapedia 통계
driver.get('https://bulbapedia.bulbagarden.net/wiki/Special:Statistics')
time.sleep(1)
bulba_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['bulba'] = bulba_1.text
bulba_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['bulba'] = bulba_2.text
time.sleep(1)

#TV Tropes 통계

driver.get('https://tvtropes.org/pmwiki/articlecount.php')
time.sleep(1)
trope_1=driver.find_element_by_xpath('//*[@id="wikimiddle"]/p[2]').text
article_tropes = trope_1.splitlines()
article_num['trope'] = digit_div(article_tropes[2].replace(': Main', ''))
trope_2=driver.find_element_by_xpath('//*[@id="wikimiddle"]/p[3]')
page_trope = trope_2.text.replace('Total: ','')
trope_2=driver.find_element_by_xpath('//*[@id="wikimiddle"]/p[3]')
page_num['trope'] = digit_div(trope_2.text.replace('Total: ',''))
time.sleep(1)


#일본어 위키백과 통계
driver.get('https://ja.wikipedia.org/wiki/%E7%89%B9%E5%88%A5:%E7%B5%B1%E8%A8%88')
time.sleep(1)
jawiki_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['jawiki'] = jawiki_1.text
jawiki_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['jawiki'] = jawiki_2.text
time.sleep(1)

#중국어 위키백과 통계
driver.get('https://zh.wikipedia.org/wiki/Special:Statistics')
time.sleep(1)
zhwiki_1=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[2]/td[2]')
article_num['zhwiki'] = zhwiki_1.text
zhwiki_2=driver.find_element_by_xpath('//*[@id="mw-content-text"]/table/tbody/tr[3]/td[2]')
page_num['zhwiki'] = zhwiki_2.text
time.sleep(1)


#통계 작성 시점 
x=datetime.datetime.now()
msg_date= '<!--date-->'+str(x.year)+'-'+str(x.month)+'-'+str(x.day)
msg_time ='<!--time-->'+str(x.hour)+':'+str(x.minute)+'(KST)'

print('날짜:', msg_date)
print('시간:', msg_time)

driver.close()

#위키/목록 텍스트 추출
site = pywikibot.Site()
page = pywikibot.Page(site, "위키/목록")
txt = page.text


#통계 수정
for pat in wiki_pattern:
    pat1_0='<!--'+pat+'-->'
    txt=re.sub(pat1_0+r'[0-9,]*', pat1_0+article_num[pat], txt)
for pat in wiki_pattern:
    pat2_0='<!--'+pat+'_2-->'
    txt=re.sub(pat2_0+r'[0-9,]*', pat2_0+page_num[pat], txt)
for pat in wiki_sol_pattern:
    pat3_0='<!--'+pat+'-->'
    txt=re.sub(pat3_0+r'[0-9,]*', pat3_0+article_num[pat], txt)

#시간 수정    
msg_date_old= '<!--date-->'+r'[0-9]{4,}\-[0-9]{1,2}\-[0-9]{1,2}'
msg_time_old= '<!--time-->'+r'[0-9]{1,2}:[0-9]{1,2}\(KST\)'
txt=re.sub(msg_date_old, msg_date, txt)
txt=re.sub(msg_time_old, msg_time, txt)

#위키/목록 페이지 수정
page.text = txt
page.save('ko:통계 갱신')