크롤링 2020 트능(트렌드 능력고사)

2020. 7. 14. 18:390x0B Web Hacking

728x90

심심해서 잠시 일하다가 휴식 시간에 짜봤다.

 

일단.. 난 70점 나왔었다. 트렌드 모르겠다 ~ 

 

import urllib.parse
import urllib.request
import urllib.error
import re
from bs4 import BeautifulSoup

class quiz(object):
    def __init__(self,name):
        self.name = name

    def __str__(self):
        return self.name

    def __repr__(self):
        return "'"+self.name+"'"


url = "https://www.trendtest.co.kr/Test"
r = urllib.request.urlopen(url).read()
parse = BeautifulSoup(r,"html.parser")

tags = parse('h3')
quiz_tmp = []
for tag in tags:
    tmp = str(tag).replace("<h3 class=\"fw400 tit\">","").split("<span class=\"fc_lgrey\">")[0]
    tmp = tmp.strip('\r\n')
    quiz_tmp.append(tmp)


tags = parse('input')
score_tmp = []

for tag in tags:
    try:
        tmp = str(tag).split('value')[1]
        tmp = re.findall("\d+",tmp)
        #print(tmp)
        score_tmp+=tmp
    except IndexError:
        continue 

tags = parse('span')
data_tmp = []
_dict = {}
for tag in tags:
        if "fw700 label" in str(tag):
            tag = str(tag).replace("<span class=\"fw700 label\">","").replace("</span>","")
            data_tmp.append(tag)

for i in range(0,len(data_tmp)):
    _dict[quiz(data_tmp[i])] = score_tmp[i]

index = 0
i = 0
for key, value in _dict.items():
    if(index % 4 ==0):
        print("")
        print(quiz_tmp[i])
        i+=1
    print(f'{key} - {value}')
    index +=1

 

셀레니움을 이용하여 자동화에도 도전해보았다.

임의로 라디오버튼 하나를 클릭한 후,  '다음으로' 버튼을 클릭하라

__author__ = 'c0nstant'
 
import unittest
import platform

from bs4 import BeautifulSoup
from time import sleep
from selenium import webdriver
from selenium.webdriver.common.keys import Keys

'''
find_elements_by_class_name
find_element_by_css_selector
'''

class MyChkbox(unittest.TestCase):
    
    def setUp(self):
        self.driver = webdriver.Firefox()
        self.url  = "https://www.trendtest.co.kr/Test"
 
    def test_Chkbox(self):
        driver = self.driver
        
        driver.set_window_position(0, 0)
        driver.maximize_window()
 
        driver.get(f'{self.url}')

        # zoom 
        driver.execute_script('document.body.style.MozTransform = "scale(0.5)";')
        driver.execute_script('document.body.style.MozTransformOrigin = "0 0";')
        
        print(f'{platform.system()}')
        

        element1=driver.find_element_by_css_selector("ul") # 제발좀요, 정보좀요, 장봐줘요, 전번좀요
        string1=str(element1.text)
        label = string1.strip().split("\n")

        driver.find_elements_by_class_name("box")[1].click()
        driver.find_elements_by_class_name("letter")[1].click()
        #driver.find_element_by_xpath(f'//*[@id="form1"]/section/div[2]/div[0]/div[2]/a').click()
        #driver.find_element_by_css_selector()
        
        
        #driver.save_screenshot('./test''.png')
        
 
    def tearDown(self):
        self.driver.quit()
 
if __name__ == '__main__':
    unittest.main()

전체 자동화

__author__ = 'c0nstant'
 
import urllib.parse,urllib.request,urllib.error
import unittest
import platform
import re

from bs4 import BeautifulSoup
from time import sleep
from selenium import webdriver
from selenium.webdriver.common.keys import Keys

'''
find_elements_by_class_name
find_element_by_css_selector
'''

class quiz(object):
    def __init__(self,name):
        self.name = name

    def __str__(self):
        return self.name

    def __repr__(self):
        return "'"+self.name+"'"

class MyChkbox(unittest.TestCase):
    
    def setUp(self):
        self.driver = webdriver.Firefox()
        self.url  = "https://www.trendtest.co.kr/Test"
        
 
    def test_Chkbox(self):
        letter_index = []

        driver = self.driver
        
        driver.set_window_position(0, 0)
        driver.maximize_window()
 
        driver.get(f'{self.url}')

        # zoom 
        driver.execute_script('document.body.style.MozTransform = "scale(0.5)";')
        driver.execute_script('document.body.style.MozTransformOrigin = "0 0";')
        
        print(f'{platform.system()}')
        
        element1=driver.find_element_by_css_selector("ul") # 제발좀요, 정보좀요, 장봐줘요, 전번좀요
        string1=str(element1.text)
        label = string1.strip().split("\n")

        # need to loop
        # [0] 제발좀요  - 1라운드
        # [1] 정보좀요
        # [2] 장봐줘요
        # [3] 전번좀요
        # [4]  ....   - 2라운드
        ######################
        # letter 기준
        # [1]-[4] : 1라운드
        # [5]-[8] : 2라운드

        for index in range(1,64,4):
            letter_index.append(index)
        print(f'letter : {letter_index}')

        r = urllib.request.urlopen(f'{self.url}').read()
        parse = BeautifulSoup(r,"html.parser")

        tags = parse('input')
        score_tmp = []

        for tag in tags:
            try:
                tmp = str(tag).split('value')[1]
                tmp = re.findall("\d+",tmp)
                score_tmp+=tmp
            except IndexError:
                continue

        tags = parse('span')
        data_tmp = []
        _dict = {}
        for tag in tags:
                if "fw700 label" in str(tag):
                    tag = str(tag).replace("<span class=\"fw700 label\">","").replace("</span>","")
                    data_tmp.append(tag)

        for i in range(0,len(data_tmp)):
            _dict[quiz(data_tmp[i])] = score_tmp[i] 
        
        index = 0
        i = 0
        box = []
        for key, value in _dict.items():
            if(index % 4 ==0):
                print("")
            
            if value=='6' or value =='7':
                print(f'{index} : {key} - {value}')
                box.append(index)
            
            index +=1

        print(f'Goal : {box}')

        for i in range(len(box)):
            driver.find_elements_by_class_name("box")[box[i]].click()
            driver.find_elements_by_class_name("letter")[letter_index[i]].click()
            sleep(0.5)
        driver.save_screenshot('./test''.png')
        
 
    def tearDown(self):
        self.driver.quit()
 
if __name__ == '__main__':
    unittest.main()

'0x0B Web Hacking' 카테고리의 다른 글

SQLI 복습  (0) 2021.03.23
HTB - Freelancer  (0) 2021.02.12
sqli practice  (0) 2020.07.14
Hackerone CMS V2 Flag02  (0) 2020.06.29
HackerFactory 10번문제  (0) 2020.01.25