使用Selenium自动玩2048

2014年8月20日 01:24

前几天看summer师弟玩Selenium感觉挺有意思。便拿着时间风靡一时的游戏“2048”来练手,写了个简单的 AI。甚是欢乐!

Selenium

啥是selenimu,简单的说——Selenium也是一个用于Web应用程序测试的工具。Selenium测试直接运行在浏览器中,就像真正的用户在操作一样。支持的浏览器包括IE、Mozilla Firefox、Mozilla Suite等(来自 http://www.51testing.com/zhuanti/selenium.html)。

selenimu这里就不多介绍了,细节请看 selenium 的 python api 文档

2048策略

2048这个游戏地球人都知道就不介绍了。主要介绍下我的AI。我的AI对于每一个方向的评估有三个方面

  1. 移动导致合并的得分 score:这个就是游戏本身定义的得分,如 4和4合并得8分,128和128合并的256分
  2. 移动后每一行每一列的单调性 monotone:对于每一行(每一列)如果 line[i] <= line[i+1]则mon+= line[i]+line[i+1]否则,mon-=line[i]+line[i+1],monotone=sum(abs(mon))
  3. 移动后相邻块值相同的情况 adjoin:任意两个相邻块的值相同,如cells[i][j]=cells[i+1][j],则 adjoin+=cells[i][j]

最后的估值 estimation = score + monotone * 0.3 + adjoin。对于上下左右四个方向取estimation最大的方向操作

这种方法还可以,运气好的话,可以得到2048

程序结构

程序的源代码如下:


from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import os
import time

size = 4

class Estimator:
    def estimate(self, precells, postcells, action, score):
        for i in range(size):
            score += self.__estimate_line([postcells[i][j] for j in range(size)])
            score += self.__estimate_line([postcells[j][i] for j in range(size)])
        return score

    def __estimate_line(self, line):
        monotone, adjoin = 0, 0
        for i in range(size - 1):
            if line[i + 1] > line[i]:
                monotone += line[i + 1] + line[i]
            else:
                monotone -= line[i + 1] + line[i]
            if line[i + 1] == line[i]:
                adjoin += line[i]
        return abs(monotone) * .3 + adjoin

class Auto2048:
    def __init__(self, url, estimator):
        self.browser = webdriver.Firefox()
        self.browser.get(url)
        self.estimator = estimator

    def get_cells(self):
        tiles = self.browser.find_elements_by_class_name('tile')
        self.cells = [[0 for i in range(4)] for i in range(4)]

        for tile in tiles:
            attr = tile.get_attribute('class').split()
            value = int(attr[1].split('-')[1])
            x = int(attr[2].split('-')[3]) - 1
            y = int(attr[2].split('-')[2]) - 1
            self.cells[x][y] = value

    def AI(self):

        self.get_cells()
        self.Print(self.cells)

        action, actionname = '', ''
        moveable = False
        
        strategies = [    {'fun': self.try_up, 'action': Keys.UP, 'name': 'Up'}, 
                        {'fun': self.try_down, 'action': Keys.DOWN, 'name': 'Down'}, 
                        {'fun': self.try_left, 'action': Keys.LEFT, 'name': 'Left'}, 
                        {'fun': self.try_right, 'action': Keys.RIGHT, 'name': 'Right'}]
        for strategy in strategies:
            result = strategy['fun']()
            estimation = self.estimator.estimate(self.cells, result['cells'], strategy['name'], result['score'])
            if result['moveable'] and (moveable == False or max_estimation < estimation):
                action = strategy['action']
                max_estimation = estimation
                moveable = True
                actionname = strategy['name']

        if not moveable:
            return False

        self.browser.find_element_by_class_name('grid-container').send_keys(action)
        print 'Action: ', actionname
        return True

    def move_left(self, cells):
        moveable = False
        score = 0
        for x in range(size):
            pre = 0
            for y in range(size):
                if cells[x][y]:
                    cells[x][pre] = cells[x][y]
                    if y != pre:
                        moveable = True
                        cells[x][y] = 0
                    pre += 1
            for y in range(size - 1):
                if cells[x][y] and cells[x][y] == cells[x][y + 1]:
                    cells[x][y] += cells[x][y]
                    score += cells[x][y]
                    cells[x][y + 1] = 0
                    moveable = True
            pre = 0
            for y in range(size):
                if cells[x][y]:
                    cells[x][pre] = cells[x][y]
                    if y != pre:
                        moveable = True
                        cells[x][y] = 0
                    pre += 1
        return {'moveable': moveable, 'score': score, 'cells': cells}
    
    def try_left(self):
        cells = [[self.cells[i][j] for j in range(size)] for i in range(size)]
        return self.move_left(cells)

    def try_right(self):
        cells = [[self.cells[i][size - 1 - j] for j in range(size)] for i in range(size)]
        result = self.move_left(cells)
        result['cells'] = [[result['cells'][i][size - 1 - j] for j in range(size)] for i in range(size)]
        return result

    def try_up(self):
        cells = [[self.cells[j][i] for j in range(size)] for i in range(size)]
        result = self.move_left(cells)
        result['cells'] = [[result['cells'][j][i] for j in range(size)] for i in range(size)]
        return result
    
    def try_down(self):
        cells = [[self.cells[size - 1 - j][i] for j in range(size)] for i in range(size)]
        result = self.move_left(cells)
        result['cells'] = [[result['cells'][j][size - 1 - i] for j in range(size)] for i in range(size)]
        return result

    def __del__(self):
        self.browser.close()

    def Print(self, cells):
        print 
        for x in range(size):
            for y in range(size):
                print '%5d' % cells[x][y], 
            print 

if __name__ == '__main__':
    url = 'file://' + os.path.abspath('2048/index.html')
    # url = "http://gabrielecirulli.github.io/2048/"
    auto2048 = Auto2048(url, Estimator())
    while auto2048.AI():
        time.sleep(0.2)
    time.sleep(10)

源代码中有两个类

主逻辑类 Auto2048

使用 2048的url和估值类(AI逻辑)构造。测试过程中,用 wget 将 "http://gabrielecirulli.github.io/2048/" 的所有页面抓到本地分析(由于不懂js在网页中的工作原理,使用find_elements_by_class_name找当前cells的信息找了好久)

每一次操作调一次AI(),AI() 先获取当前页面的状态保存在 self.cells 中,然后对上下左右四个方向枚举,取估值最大的方向并使用send_keys进行操作。如果能操作则AI()返回True,否则返回False

估值类 Estimator

估值类只要实现 def estimate(self, precells, postcells, action, score): 估值方法即可。其中,precells为操作前状态,postcells为操作后状态,atcion为操作['Left', 'Right', 'Up', 'Down'],score为操作得分。返回值为估值estimation,值越到越好。

虽然我的估值方法运气好的话可以得到2048,但还是很粗糙的。你要有兴趣的话,可以写一个更好的Estimator得到更高的分。

源代码地址 https://github.com/xhSong/auto2048

这段时间的百度之星又错过了。老了,也做不动了,第二次题目a了一个,第二个一个dp的题目感觉方法对的,可硬是没有调过,看来真的是更不行了。

对于今年的Astar我就不吐槽了。查成绩有点麻烦,翻页翻到手抽筋,写一python脚本,把抓出来的成绩贴贴吧里面,结果被删贴,贴脚本也被删贴,郁闷!

还是把程序贴这里吧

#!/usr/bin/python
# coding=utf8
import sys
import urllib2
from re import sub

problemurl="http://astar.baidu.com/index.php?r=home/detail&id=10"


def analysisPage(html, csvfile):
	html = sub(r'[\s]+', ' ', html)
	tbody = sub(r'(^.*<tbody>)|(</tbody>.*$)', "", html)
	items = sub(r'[\s]+', ' ', sub(r'<[^<>]*>', ' ', tbody))
	items = items.strip().split(' ')
	for i in range(len(items) / 5):
		record = ",".join(items[i*5:i*5+5])
		print record
		csvfile.write(record + "\n")
	
def getResult(problemurl):
	csvfile = open("result.csv", "w")
	csvfile.write('编号,用户名,语言,文件名,得分\n')
	pageid = 0
	while True:
		pageid += 1
		url = "%s&BccSubmitLogs_page=%d&ajax=projects-submit-logs" % (problemurl, pageid)
		html = urllib2.urlopen(url).read()
		analysisPage(html, csvfile)
		if html.find(u"下一页") == -1 or html.find('class="next hidden"') != -1:
			break
	csvfile.close()

if __name__ == '__main__':
#	reload(sys)
#	sys.setdefaultencoding("utf8")
	#getResult(problemurl)
	#exit(1)
	if len(sys.argv) != 2:
		print "Usage: astar2012.py problem_url"
		exit(1)
	getResult(sys.argv[1])

 

总结一下,现在接触过下面这些python库了

  • PIL(Python Imaging Lib)/Image: 图像处理的库

  • cv/cv2:计算机视觉

  • numpy:NumPy is the fundamental package for scientific computing with Python

  • math:数学库

  • csv:csv文件处理

  • MySQLdb:链接mysql数据库

  • mlpy:机器学习库

  • matplotlib:It provides both a very quick way to visualize data from Python and publication-quality figures in many formats. 像matlab那么,可以画出很漂亮的图

  • M2Crypto、Crypto、pyecc:密码学库(hash,对称加密算法,非对称加密算法,签名认证等)

  • webpy:构建一个轻量级网站A minimalist web framework written in Python

  • urllib/urllib2:url访问,页面抓取等

  • re:正则表达式处理

  • os、sys:顾名思义,就是系统、文件的一些操作

  • ConfigParser:配置文件处理

  • Tkinter:图形界面库

python的各种库还是很强大的,o(∩∩)o...哈哈

由于课程作业的需求,我们要抓取京东商品的信息。在抓取商品的价格、评论人数以及评分的时候遇到一些麻烦。下面提供我的解决方案。

1. 商品价格:

京东上面的商品价格都是用图片显示的,不过庆幸的时,所有数字的字体、大小、颜色都是一样的。简单起见,直接把二维图片变成灰度图,取个反,影射成一维结构后进行切割和匹配。用了一个简单的匹配评估函数,只是对于数字不变的情况总是能得到正确的结果,如果字体、颜色、大小等变了,估计就得不到正确结果了。具体代码如下

#!/usr/bin/python
# coding=utf-8
import Image, sys

class PriceReco:
    img_data = []
    size_x, size_y = 0, 0
    def __init__(self, filename): #加载变换图片
        try:
            img = Image.open(filename)
        except:
            print filename, "load error"
            return 
        self.size_x, self.size_y = img.size
        self.img_data = list(img.convert('L').getdata())
        for i in range(0, len(self.img_data)):
            self.img_data[i] = 255 - self.img_data[i]
        #print filename, "load success, image size is", self.size_x, self.size_y
        #print self.img_data
        
    def getone(self, single): #识别单个数字
        table_value = [
                [189, 378, 945, 1512, 2079, 1701, 1701, 1134, 945, 378, 189], #¥
                [567, 567], # .
                [1323, 1701, 756, 378, 378, 378, 756, 1701, 1323], # 0
                [378, 378, 2079, 2079, 189, 189], # 1
                [567, 945, 756, 756, 756, 756, 945, 945, 567], # 2
                [756, 1134, 378, 567, 567, 567, 1323, 1512, 756], # 3
                [378, 378, 378, 378, 378, 378, 2079, 2079, 189, 189], # 4
                [378, 1512, 1134, 567, 567, 567, 945, 1134, 756], # 5
                [1134, 1512, 945, 756, 567, 567, 945, 1134, 567], # 6
                [189, 189, 378, 756, 945, 945, 945, 756, 378], # 7
                [756, 1512, 1323, 567, 567, 567, 1323, 1512, 756], # 8
                [567, 1134, 945, 567, 567, 756, 945, 1512, 1134], # 9
                ]
        table_key = ['¥', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
        key_id, min_value = 0, 100000
        #print single
        for k in range(0, len(table_key)):
            #print len(table_value[k]), len(single)
            value = 10 * (len(table_value[k]) - len(single)) ** 2
            #print value
            for i in range(0, min(len(table_value[k]), len(single))):
                value += (table_value[k][i] - single[i]) ** 2
            #print value
            if value < min_value:
                key_id, min_value = k, value
                #print "updata: ", key_id, min_value
        #print min_value
        if min_value > 100:
            return 'N'
        else:
            return table_key[key_id]
            
    def recognita(self): #切分和识别图片
        cnt = [0] * self.size_x
        for x in range(0, self.size_x):
            for y in range(0, self.size_y):
                index = y * self.size_x + x
                cnt[x] += self.img_data[index]
        #print cnt
        x = 0
        number = ""
        while x < self.size_x:
            if cnt[x]:
                single = []
                while x < self.size_x and cnt[x]:
                    single.append(cnt[x])
                    x += 1
                number += self.getone(single)
            x += 1
        return number
        
if __name__ == '__main__':
    if len(sys.argv) != 2:
        print "Usage: price_reco image"
    price = PriceReco(sys.argv[1])
    print price.recognita() 

2. 评论人数以及商品评分

这个问题就简单多了,虽然京东304(重定向)了n次,稍加分析就可以知道,直接访问如下url就可以得到评论人数和商品评分了

http://club.360buy.com/ProductPageService.aspx?method=GetCommentSummaryBySkuId&referenceId=$id&callback=GetCommentSummaryBySkuId

其中 $id是商品的id,这个得到很简单。

 

于是就成功攻破京东,o(∩∩)o...哈哈!

 

01图

2012年11月03日 19:12

事情很多,就是不想干。之前看到贴吧上一些帖子用字符做画,感觉挺好玩的。于是试着用python写一个用0和1以及作画的程序。先上几个作品吧!

            01                                                              
           01  1111111111111                                                
       11111   1111111    11111                                             
     0 111111 111111111   11111 1                                           
   1  1111111111111111111111111   1                                         
  0 11111111111111111111111111111110                                        
 1111  111111111 1 111111111111111111                                       
 1 1    111111    1111111111111111111                                       
011111111111111111111111111    1111111                                      
111111111111111111111111111   111111                                        
  111111111111111111111111111111111                                         
  111111111111    11111111111111   111                                      
 11111111111111  11111111111  1                                             
111      111111 1111110          111111111                                  
1111   11111 1   11  1          11111111111    11111111111       1111       
 1111111111 1      0           1111111111111               1  11111111111   
 1  1               11         1111111111                    1111111111111  
                     11         111111                         11111111111  
       0             1 1           1                            1111111111  
      01 0            0 0         1                              11111111   
     01 01             1        0 11111                           1         
      11 1             1 0     1 111111           1111111          1        
   0    1               1 1   1    1              11111111         1        
        1               11   0                       01111          1       
                        11100       111 1111                        1       
                  1 10111 111                                        1      
                    00101  1                                         1      
                          1                                          1      
                          1                                          1      
                          1                                  1 1     1      
                          1                                1  1      011    
                          1                               1110      11      
                          1                                         1       
                           1                                       1       1
                            0                                     1         
                              11                               11           
                                  011                    1111               
                                           1111111101                       

源图片:

萌1

 

                               1111             1111                            
          00000000000001 11                            11100000000000001        
       1000000000000001                                     10000000000000      
       0000000000000                                          1000000000000     
      10000000001                                                10000000001    
      100000000                                                    100000001    
       000000                                                        100000     
        001                                                            000      
         1                                                              0       
       00000000000000000000001                      0000000000000000000000      
      100000001        1000000001               0000000001        1100000001    
     0000001              100000000           100000001               0000001   
    000000        11110       0000001        0000001      011011       100000   
   100000   111     001        0000001      0000001       1001     111  100001  
   00000011                 100000000       1000000011                 1000000  
  1000000              0000000000001         10000000000001             1000000 
 100000000        1000000000001                   1000000000001        000000001
 10000000001   100000000001                            00000000001    1000000000
 000000000000000000001                                     110000000000000000000
 000000000000000001                                            10000000000000000
10000000000001                                                     1000000000000
11  100000                                                             100001  1
 1                                                                             0
 11                                                                            1
  0                                                                           1 
  11                                                                         11 
                                                                             1  
    11                                                                      1   
      0                                                                    0    
       11                                                                 1     
          1                                                            10       
           10                                                         1         
              11                                                   1            
                 111                                            11              
                      1111                                111                   
                              1000011111111111111111100000                      

源图片:

萌2

原理很简单,也没有什么技术含量,就是那个图片变换一下,定两个0和1的阀值,然后就ok了。

 

#!/usr/bin/python
# coding=utf-8
from Tkinter import *
from FileDialog import *
import Image

file_name = 'a.jpg'

def getFileName():
    file_dialog = LoadFileDialog(frame)
    global file_name
    file_name = file_dialog.go()
    try:
        img = Image.open(file_name)
    except:
        print 'openfile error'
    show_img.image = img
    show_img.pack()


def gen():
    try:
        img_o = Image.open(file_name)
    except:
        print 'openfile error'
    img_L = img_o.convert('L')
    size_x, size_y = img_o.size
    size_y = size_y * 80 / size_x / 2
    size_x = 80
#img_L.resize((size_x, size_y)).show()
    data = list(img_L.resize((size_x, size_y)).getdata())

    min_x, min_y = size_x, size_y
    max_x, max_y = 0, 0

    for i in range(0, size_y):
        for j in range(0, size_x):
            index = i * size_x + j
            if data[index] <= 170:
                min_x, min_y = min(min_x, j), min(min_y, i)
                max_x, max_y = max(max_x, j), max(max_y, i)
    genText.delete('1.0', 'end')
    for i in range(min_y, max_y + 1):
        for j in range(min_x, max_x + 1):
            index = i * size_x + j
            if data[index] > 170:
                to = ' '
            elif data[index] > 85:
                to = '1'
            else:
                to = '0'
            genText.insert('end', to)
        #print
        genText.insert('end', '\n')



if __name__ == "__main__":
     
    mainWindow = Tk()
    mainWindow.title()
    #mainWindow.geometry('640x480+0+0')
    
    frame = Frame(mainWindow)
    frame.pack()
    
    fileBotton = Button(frame, text =u"打开文件", command = getFileName)
    fileBotton.pack(side = LEFT)
    
    genBotton = Button(frame, text = u"生成", command = gen)
    genBotton.pack(side = LEFT)
    
    genText = Text(frame, height = 40)
    genText.pack(fill = BOTH, padx = 10, pady = 10)
    
    show_img = Label(frame)
    show_img.pack()
    mainWindow.mainloop()

一个弱爆的google 翻译客户端

2012年10月10日 02:06

在ubuntu下每次想翻译一个词或者一句话都要开浏览器上google翻译,感觉挺麻烦。于是自己写了一个python小脚本(googleTranslate.py),简化这个流程。

 

#!/usr/bin/python
# coding=utf-8

import urllib,urllib2
import sys
if __name__ == '__main__':
    if len(sys.argv) < 2:
        exit()
    tolanguage = 'en'
    text = ""
    if sys.argv[1] == 'en':
        tolanguage = 'en'
    elif sys.argv[1] == 'ch':
        tolanguage = 'zh-CN'
    else:
        text = sys.argv[1]
    for i in range(2, len(sys.argv)):
        text += sys.argv[i] + " "
    values = {'client':'t', 'text': text, 'hl':'en', 'sl':'auto', 'tl':tolanguage, 'ie':'UTF-8', 'oe':'UTF-8', 'multires':'1', 'otf':'2', 'ssel':'0', 'tsel':'0', 'sc':'1'}
    url = 'http://translate.google.cn/translate_a/t'
    request = urllib2.Request(url, urllib.urlencode(values))
    request.add_header('User-Agent', "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4")
    response = urllib2.urlopen(request)
    data = response.read()
    #print data
    frags = data[4:data.find("]]")].split("\"],[\"")
    totext = ""
    for frag in frags:
        totext += frag.split("\",\"")[0]
    print totext

将这个脚本放在某个目录下,此处以/path/to为例子。使用以下命令添加脚本可执行属性

chmod +x googleTranslate.py

然后在.bashrc最后添加一行

alias t='/path/to/googleTranslate.py'

于是乎在任何界面下想要翻译词句子,直接ctrl+alt+t,输入

t 想要翻译的词句

即可。默认是将词句翻译成英文,如果需要将其他语言翻译成中文,执行

t ch 想要翻译的词句

即可。方便快捷,o(∩∩)o...哈哈

 

而后觉得写成一个小软件还是很靠谱的,于是又折腾了下python Tkinter,把上面的代码加了一个GUI的外壳。废话少说,上图

GUI的代码也不复杂,顺便也贴了吧,供大家交流。BUG比较多,有待后期改进,谢谢阅读!

#!/usr/bin/python
# coding=utf-8

import urllib, urllib2
from Tkinter import *
from ttk import Combobox
 
def translate():
	tolanguage = languageMap[option.get()]
	text = startText.get('1.0', 'end').encode('utf-8')
	
	values = {'client':'t', 'text': text, 'hl':'en', 'sl':'auto', 'tl':tolanguage, 'ie':'UTF-8', 'oe':'UTF-8', 'multires':'1', 'otf':'2', 'ssel':'0', 'tsel':'0', 'sc':'1'}
	url = 'http://translate.google.cn/translate_a/t'
	request = urllib2.Request(url, urllib.urlencode(values))
	request.add_header('User-Agent', "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4")
	response = urllib2.urlopen(request)
	data = response.read()
	print data
	
	frags = data[4:data.find("]]")].split("\"],[\"")
	totext = ""
	for frag in frags:
		totext += frag.split("\",\"")[0]
	
	toText.delete('1.0', 'end')
	toText.insert('1.0', totext.replace("\\n", "\n"))

if __name__ == "__main__":
	
	mainWindow = Tk()
	mainWindow.title(u"Google 翻译 by hustsxh@gmail.com")
	#mainWindow.geometry('640x480+0+0')
	
	frame = Frame(mainWindow)
	frame.pack()
	
	Label(frame, text = u'翻译成:').pack(side = LEFT)
	
	languageMap = {'English': 'en', u'中文简体': 'zh-CN', u'中文繁體': 'zh-TW', u'日本語': 'ja', u'한국의': 'ko', u'Deutsch': 'de', u'русский': 'ru', u'française': 'fr'}
	defaultLanguage = StringVar(frame, 'English')
	option = Combobox(frame, text = defaultLanguage, values = languageMap.keys())
	option.pack(side = LEFT)
	
	transBotton = Button(frame, text =u"翻译", command = translate)
	transBotton.pack(side = LEFT)
	
	startText = Text(mainWindow, height = 15)
	startText.pack(fill = BOTH, padx = 10, pady = 10)
	
	toText = Text(mainWindow, height = 15)
	toText.pack(fill = BOTH, padx = 10, pady = 10)
	
	mainWindow.mainloop()