크롤링 2020 트능(트렌드 능력고사)
2020. 7. 14. 18:39ㆍ0x0B Web Hacking
728x90
심심해서 잠시 일하다가 휴식 시간에 짜봤다.
일단.. 난 70점 나왔었다. 트렌드 모르겠다 ~
import urllib.parse
import urllib.request
import urllib.error
import re
from bs4 import BeautifulSoup
class quiz(object):
def __init__(self,name):
self.name = name
def __str__(self):
return self.name
def __repr__(self):
return "'"+self.name+"'"
url = "https://www.trendtest.co.kr/Test"
r = urllib.request.urlopen(url).read()
parse = BeautifulSoup(r,"html.parser")
tags = parse('h3')
quiz_tmp = []
for tag in tags:
tmp = str(tag).replace("<h3 class=\"fw400 tit\">","").split("<span class=\"fc_lgrey\">")[0]
tmp = tmp.strip('\r\n')
quiz_tmp.append(tmp)
tags = parse('input')
score_tmp = []
for tag in tags:
try:
tmp = str(tag).split('value')[1]
tmp = re.findall("\d+",tmp)
#print(tmp)
score_tmp+=tmp
except IndexError:
continue
tags = parse('span')
data_tmp = []
_dict = {}
for tag in tags:
if "fw700 label" in str(tag):
tag = str(tag).replace("<span class=\"fw700 label\">","").replace("</span>","")
data_tmp.append(tag)
for i in range(0,len(data_tmp)):
_dict[quiz(data_tmp[i])] = score_tmp[i]
index = 0
i = 0
for key, value in _dict.items():
if(index % 4 ==0):
print("")
print(quiz_tmp[i])
i+=1
print(f'{key} - {value}')
index +=1
셀레니움을 이용하여 자동화에도 도전해보았다.
임의로 라디오버튼 하나를 클릭한 후, '다음으로' 버튼을 클릭하라
__author__ = 'c0nstant'
import unittest
import platform
from bs4 import BeautifulSoup
from time import sleep
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
'''
find_elements_by_class_name
find_element_by_css_selector
'''
class MyChkbox(unittest.TestCase):
def setUp(self):
self.driver = webdriver.Firefox()
self.url = "https://www.trendtest.co.kr/Test"
def test_Chkbox(self):
driver = self.driver
driver.set_window_position(0, 0)
driver.maximize_window()
driver.get(f'{self.url}')
# zoom
driver.execute_script('document.body.style.MozTransform = "scale(0.5)";')
driver.execute_script('document.body.style.MozTransformOrigin = "0 0";')
print(f'{platform.system()}')
element1=driver.find_element_by_css_selector("ul") # 제발좀요, 정보좀요, 장봐줘요, 전번좀요
string1=str(element1.text)
label = string1.strip().split("\n")
driver.find_elements_by_class_name("box")[1].click()
driver.find_elements_by_class_name("letter")[1].click()
#driver.find_element_by_xpath(f'//*[@id="form1"]/section/div[2]/div[0]/div[2]/a').click()
#driver.find_element_by_css_selector()
#driver.save_screenshot('./test''.png')
def tearDown(self):
self.driver.quit()
if __name__ == '__main__':
unittest.main()
전체 자동화
__author__ = 'c0nstant'
import urllib.parse,urllib.request,urllib.error
import unittest
import platform
import re
from bs4 import BeautifulSoup
from time import sleep
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
'''
find_elements_by_class_name
find_element_by_css_selector
'''
class quiz(object):
def __init__(self,name):
self.name = name
def __str__(self):
return self.name
def __repr__(self):
return "'"+self.name+"'"
class MyChkbox(unittest.TestCase):
def setUp(self):
self.driver = webdriver.Firefox()
self.url = "https://www.trendtest.co.kr/Test"
def test_Chkbox(self):
letter_index = []
driver = self.driver
driver.set_window_position(0, 0)
driver.maximize_window()
driver.get(f'{self.url}')
# zoom
driver.execute_script('document.body.style.MozTransform = "scale(0.5)";')
driver.execute_script('document.body.style.MozTransformOrigin = "0 0";')
print(f'{platform.system()}')
element1=driver.find_element_by_css_selector("ul") # 제발좀요, 정보좀요, 장봐줘요, 전번좀요
string1=str(element1.text)
label = string1.strip().split("\n")
# need to loop
# [0] 제발좀요 - 1라운드
# [1] 정보좀요
# [2] 장봐줘요
# [3] 전번좀요
# [4] .... - 2라운드
######################
# letter 기준
# [1]-[4] : 1라운드
# [5]-[8] : 2라운드
for index in range(1,64,4):
letter_index.append(index)
print(f'letter : {letter_index}')
r = urllib.request.urlopen(f'{self.url}').read()
parse = BeautifulSoup(r,"html.parser")
tags = parse('input')
score_tmp = []
for tag in tags:
try:
tmp = str(tag).split('value')[1]
tmp = re.findall("\d+",tmp)
score_tmp+=tmp
except IndexError:
continue
tags = parse('span')
data_tmp = []
_dict = {}
for tag in tags:
if "fw700 label" in str(tag):
tag = str(tag).replace("<span class=\"fw700 label\">","").replace("</span>","")
data_tmp.append(tag)
for i in range(0,len(data_tmp)):
_dict[quiz(data_tmp[i])] = score_tmp[i]
index = 0
i = 0
box = []
for key, value in _dict.items():
if(index % 4 ==0):
print("")
if value=='6' or value =='7':
print(f'{index} : {key} - {value}')
box.append(index)
index +=1
print(f'Goal : {box}')
for i in range(len(box)):
driver.find_elements_by_class_name("box")[box[i]].click()
driver.find_elements_by_class_name("letter")[letter_index[i]].click()
sleep(0.5)
driver.save_screenshot('./test''.png')
def tearDown(self):
self.driver.quit()
if __name__ == '__main__':
unittest.main()
'0x0B Web Hacking' 카테고리의 다른 글
SQLI 복습 (0) | 2021.03.23 |
---|---|
HTB - Freelancer (0) | 2021.02.12 |
sqli practice (0) | 2020.07.14 |
Hackerone CMS V2 Flag02 (0) | 2020.06.29 |
HackerFactory 10번문제 (0) | 2020.01.25 |