2019-03-30 00:53:42    2019-03-30 00:53:42   

python asyncio 协程 coroutine
### 介绍 终端使用的是asyncio,单线程支持并发操作。 聊天终端,暂时仅提供登陆/注销/发送在线消息/发送离线消息/帮助功能(未完待续) 登陆命令:auth userid password (userid为用户账号,用户数据存储在mysql中) 注销命令:logout 发送消息:msg userid message (支持在线和离线消息,离线消息,当对方不在线时,存储到 redis中,等用户上线之后再推送给该用户) 帮助命令:help/? ### 环境 系统:`CentOS7` 编程语言:`Python` 语言解释器:`Cpython-3.7.2` 数据库:`mysql-5.7` `redis-5.0.3` 缓存:`redis-5.0.3` ### 表结构 #### user表 ```sql CREATE TABLE `user` ( `id` int(11) NOT NULL AUTO_INCREMENT, `username` varchar(255) NOT NULL, `password` varchar(255) NOT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB AUTO_INCREMENT=91 DEFAULT CHARSET=utf8; ``` ### 服务器端代码 chat_server.py ```py #coding:utf-8 ###################### # 作者: SheYinsong # # 时间: 2019-03-30 # ###################### import asyncio import aiomysql import aioredis import sys import struct import ssl import time import logging SERVER_ADDRESS = ('0.0.0.0',10000) logging.basicConfig( level=logging.DEBUG, format='%(name)s: %(message)s', stream=sys.stderr, ) log=logging.getLogger('main') #LOOP loop=asyncio.get_event_loop() #TLS配置 CERTFILE='ssl/server.crt' KEYFILE='ssl/server.key' #在线用户userid到读写Stream映射 auth_userid_map_wstream={} auth_wstream_map_userid={} #mysql 配置 db_config={ 'host':'localhost', 'port':3306, 'user':'root', 'password':'xxxxxxxx', 'db':'chat', 'charset':'utf8' } #SQL 模板 QUERY_AUTH_SQL='select password from user where id=%s' QUERY_USERID_IS_EXISTS_SQL='select id from user where id=%s' QUERY_USERNAME_OF_USERID_SQL='select username from user where id=%s' #redis配置 #redis_host='localhost' #redis_port=6379 redis_unix_socket='/var/run/redis/redis.sock' #redis KEY模板 KV_EXISTS_USERID=u'kv_exists_userid:userid:%s' KV_USERID_GET_USERNAME=u'kv_userid_get_username:userid:%s' LIST_USERMESSAGES_OF_USERID=u'list_usermessages_of_userid:userid:%s' #支持的命令列表 cmd_list=['auth','msg','logout'] #命令帮助 cmd_help=""" =============================================== | | | 命令使用说明: | | | =============================================== |命令 参数 参数 (说明) | =============================================== |auth UID PASSWORD (登录系统) | |msg UID MESSAGE (给用户发送消息) | =============================================== |命令 (说明) | =============================================== |logout (退出) | =============================================== """ async def get_custom_time_string(t_format="%Y-%m-%d %X"): """获取指定格式的时间字符串 默认格式为"%Y-%m-%d %X" """ return time.strftime(t_format) async def get_db_pool(): """获取mysql连接池""" pool = await aiomysql.create_pool(**db_config) return pool async def get_redis_pool(): """获取redis连接池""" pool = await aioredis.create_pool( redis_unix_socket, minsize=5, maxsize=10) return pool async def db_query(sql): """mysql执行查询操作""" pool = await get_db_pool() results=() async with pool.acquire() as conn: async with conn.cursor() as cur: log.debug(f'查询数据库,SQL->[ {sql} ]') await cur.execute(sql) results = await cur.fetchall() pool.close() await pool.wait_closed() return results async def redis_query(cmd,*args): """redis执行查询操作""" pool=await get_redis_pool() with await pool as conn: results =await conn.execute(cmd,*args) pool.close() await pool.wait_closed() return results async def redis_dml(cmd,*args): """redis执行增删改操作""" pool=await get_redis_pool() with await pool as conn: await conn.execute(cmd,*args) pool.close() await pool.wait_closed() async def get_username_of_userid(userid): """获取userid对应的用户名""" if await redis_query('exists',KV_USERID_GET_USERNAME % userid): username=await redis_query('get',KV_USERID_GET_USERNAME % userid) return username.decode('utf-8') else: res=await db_query(QUERY_USERNAME_OF_USERID_SQL % userid) if res: username=res[0][0] await redis_dml('set',KV_USERID_GET_USERNAME % userid,username) return username return '' async def get_userid_of_wstream(wstream): """通过writestream获取userid""" try: userid=auth_wstream_map_userid[wstream] return userid except KeyError: return -1 async def get_wstream_of_userid(userid): """通过userid获取writestream""" try: write_stream=auth_userid_map_wstream[userid] return write_stream except KeyError: return '' async def write_data(writer,data): """发送数据""" b_data=bytes(data,encoding='utf-8') b_datasize=struct.pack('H',len(b_data)) try: writer.write(b_datasize+b_data) await writer.drain() return True except: await clean_wstream(writer) return False async def read_data(reader): """读取数据""" try: length=struct.unpack('H',await reader.read(2))[0] b_data=await reader.read(int(length)) return b_data.decode('utf-8') except Exception as e: log.debug(f'读取客户端数据异常:[{e}]') return '' async def clean_wstream(writer): """清理客户端登陆信息和关闭stream""" userid=await get_userid_of_wstream(writer) if userid != -1: try: del auth_userid_map_wstream[userid] except: pass try: del auth_wstream_map_userid[writer] except: pass writer.close() async def user_auth(*args): """用户认证""" userid=int(args[0]) pwd=args[1] sql=QUERY_AUTH_SQL % (userid) res=await db_query(sql) if res: if res[0][0] == pwd: return True return False async def userid_is_exists(userid): """"判断userid是否存在""" if await redis_query('exists',KV_EXISTS_USERID % userid): return True else: res=await db_query(QUERY_USERID_IS_EXISTS_SQL % userid) if res: await redis_dml('set',KV_EXISTS_USERID % userid,'') return True return False async def userid_is_online(userid): """判断用户是否在线""" try: writer=auth_userid_map_wstream[userid] return True except KeyError: return False async def offline_userid(operator_type,*args): """下线用户""" userid=int(args[0]) try: writer=auth_userid_map_wstream[userid] if operator_type == 0: msg='有用户登录您的账号,您已被挤下线!' elif operator_type == 1: msg='您已退出登录!' else: msg='您已退出系统!' await write_data(writer,msg) except Exception as e: log.debug(e) finally: try: del auth_userid_map_wstream[userid] except: log.debug('auth_userid_map_wstream 删除失败') try: del auth_wstream_map_userid[writer] except: log.debug('auth_wstream_map_userid 删除失败') async def send_user_msg(writer,*args): """发送用户消息""" to_userid=int(args[0]) from_userid=auth_wstream_map_userid[writer] content=' '.join(args[1:]) if not await userid_is_exists(to_userid): msg='系统不存在该userid!' await write_data(writer,msg) return from_username=await get_username_of_userid(from_userid) to_username=await get_username_of_userid(to_userid) custom_time=await get_custom_time_string() send_content=f'{custom_time}\n[ {from_username}|UID:{from_userid} ]:{content}' if to_userid == from_userid: send_content=f'{custom_time}\n[ {from_username}|UID:{from_userid} ]:{content}' await write_data(writer,send_content) return if await userid_is_online(to_userid): to_writer=auth_userid_map_wstream[to_userid] if await write_data(to_writer,send_content): await write_data(writer,send_content) return #用户不在线,发送离线消息 print("用户不在线,发送离线消息") await redis_dml('rpush',LIST_USERMESSAGES_OF_USERID % to_userid,send_content) await write_data(writer,f'[离线消息]\n{send_content}') async def user_is_auth(writer): """判断用户是否认证""" if writer in auth_wstream_map_userid.keys(): return True return False async def get_offline_msg_of_userid(userid): "获取userid对应的离线消息" return await redis_query('lrange',LIST_USERMESSAGES_OF_USERID % userid,0,-1) async def push_offline_msg(writer,*args): """推送离线消息""" userid=int(args[0]) user_offline_messages_list=await get_offline_msg_of_userid(userid) for user_offline_message in user_offline_messages_list: await write_data(writer,user_offline_message.decode('utf-8')) await redis_query('del',LIST_USERMESSAGES_OF_USERID % userid) async def update_auth(writer,*args): """更新auth_userid_map_wstream和auth_wstream_map_userid""" userid=int(args[0]) auth_userid_map_wstream[userid]=writer auth_wstream_map_userid[writer]=userid async def check_args_validity(writer,cmd,*args): if cmd == 'auth': if len(args) != 2: msg='提示:认证格式: [ auth userid pwd ]' await write_data(writer,msg) return False try: userid=int(args[0]) except: msg='userid为整数!' await write_data(writer,msg) return False elif cmd == 'msg': if len(args) < 2: msg='提示:消息格式: [ msg userid content ]' await write_data(writer,msg) return False try: to_userid=int(args[0]) except: msg='userid为整数!' await write_data(writer,msg) return False return True async def handler_client(reader,writer): """处理客户端连接""" address=writer.get_extra_info('peername') log=logging.getLogger('echo_{}_{}'.format(*address)) log.debug('收到新连接') args_ok=True cmd_ok=True while True: if not await user_is_auth(writer) and cmd_ok and args_ok: msg=u'请登陆!' await write_data(writer,msg) data=await read_data(reader) if not data: break cmd=data.split(' ')[0] args=data.split(' ')[1:] if cmd == '?' or cmd == 'help': await write_data(writer,f'{cmd_help}') cmd_ok=False continue elif cmd not in cmd_list: await write_data(writer,'输入的命令不支持,请使用 help 或 ? 查看帮助!') cmd_ok=False continue cmd_ok=True if not await user_is_auth(writer): if cmd != 'auth': continue if not await check_args_validity(writer,cmd,*args): args_ok=False continue args_ok=True if await user_auth(*args): await offline_userid(0,*args) await update_auth(writer,*args) msg='认证成功!' await write_data(writer,msg) await push_offline_msg(writer,*args) else: msg='认证失败!' await write_data(writer,msg) else: if not await check_args_validity(writer,cmd,*args): args_ok=False continue args_ok=True if cmd == 'msg': await send_user_msg(writer,*args) elif cmd == 'logout': userid=await get_userid_of_wstream(writer) await offline_userid(1,userid) elif cmd == 'auth': await write_data(writer,'您已登录!') context=ssl.create_default_context(ssl.Purpose.CLIENT_AUTH) context.load_cert_chain(certfile=CERTFILE,keyfile=KEYFILE) factory=asyncio.start_server(handler_client,*SERVER_ADDRESS,ssl=context) server=loop.run_until_complete(factory) log.debug('starting up on {} port {}'.format(*SERVER_ADDRESS)) try: loop.run_forever() except KeyboardInterrupt: pass finally: log.debug('关闭服务器.') server.close() loop.run_until_complete(server.wait_closed()) log.debug('关闭事件循环LOOP.') loop.close() ``` ### 客户端 chat_client.py ```py #coding:utf-8 ###################### # 作者: SheYinsong # # 时间: 2019-03-30 # ###################### import asyncio from aioconsole import ainput import concurrent.futures import sys import ssl import time import struct import logging #日志配置 logging.basicConfig( level=logging.DEBUG, format='%(name)s: %(message)s', stream=sys.stderr, ) log = logging.getLogger('main') #TLS配置 CERTFILE='ssl/server.crt' #chat服务器配置 SERVER_ADDRESS = ('chat.unotes.co',10000) #证书使用的chat.unotes.co域名,根据实际证书域名填写 loop = asyncio.get_event_loop() async def write_data(writer,data): """发送数据""" b_data=bytes(data,encoding='utf-8') b_datasize=struct.pack('H',len(b_data)) try: writer.write(b_datasize+b_data) await writer.drain() return True except: return False async def read_data(reader): """读取数据""" try: length=struct.unpack('H',await reader.read(2))[0] b_data=await reader.read(int(length)) return b_data.decode('utf-8') except Exception as e: log.debug(f'读取客户端数据异常:[{e}]') return '' async def read(reader,once=False): """循环读取数据""" while True: data=await read_data(reader) if data: if once: print(f'{data}\n',end='') break print(f'\x08\x08\x08{data}\n>>>',end='') async def chat_client(address): """聊天客户端""" log = logging.getLogger('chat_client') log.debug('connecting to {} port {}'.format(*address)) context=ssl.create_default_context(ssl.Purpose.SERVER_AUTH) context.load_verify_locations(CERTFILE) reader, writer = await asyncio.open_connection(*address,ssl=context) await read(reader,True) read_task=asyncio.create_task(read(reader)) while True: data=await ainput('>>>') if not data: data='?' await write_data(writer,data) try: loop.run_until_complete( chat_client(SERVER_ADDRESS) ) except: pass finally: log.debug('closing event loop') loop.close() ``` ### 运行服务器 ```bash $ python chat_server_asyncio_tls.py ``` ``` asyncio: Using selector: EpollSelector main: starting up on 0.0.0.0 port 10000 ``` ### 运行客户端1 #### 用户张三 ```bash python chat_client_asyncio_tls.py ``` ``` 请登陆! >>>auth 10001 123456 认证成功! >>>msg 10002 你好,我是张三 [离线消息] 2019-03-29 17:00:24 [ 张三|UID:10001 ]:你好,我是张三 >>> ``` ### 运行客户端2 #### 用户李四 ```bash python chat_client_asyncio_tls.py ``` ``` 请登陆! >>>auth 10002 123456 认证成功! 2019-03-29 17:00:24 [ 张三|UID:10001 ]:你好,我是张三 >>>msg 10001 你好,张三,我收到您的消息 2019-03-29 17:02:52 [ 李四|UID:10002 ]:你好,张三,我收到您的消息 >>> ``` #### 查看客户端1 ### 用户张三 ```bash $ python chat_client_asyncio_tls.py ``` ``` 请登陆! >>>auth 10001 123456 认证成功! >>>msg 10002 你好,我是张三 [离线消息] 2019-03-29 17:00:24 [ 张三|UID:10001 ]:你好,我是张三 2019-03-29 17:02:52 [ 李四|UID:10002 ]:你好,张三,我收到您的消息 ```
阅读 144 评论 0 收藏 0
阅读 144
评论 0
收藏 0

   2019-03-12 14:51:06    2019-06-17 10:27:42   

对称加密 非对称加密 摘要算法
### 加密算法 说明:这里为了方便介绍,引入发送方A和接收方B       #### 非对称加密算法(被加密数据的安全性) `场景`:加密(RSA/DH/Elgamal/ECC)少量数据、数字签名(RSA/DSA/Elgamal) `算法`:DH RSA DSA ECC(椭圆曲线算法) Elgamal Merkle-Hellman(背包算法) Miller-Rabin(素数测试算法) 加密/解密场景 ```bash 发送方A 加密:DH/RSA/DSA公钥(接收方B)+对称加密的KEY-->密文KEY 接收方B 解密:DH/RSA/DSA私钥(接收方B)+密文KEY-->对称加密的KEY ``` 数字签名/验签场景 ```bash 发送方A 签名: 1.(SHA1/MD5+明文)->摘要1 2.摘要1+(RSA/Elgamal)私钥(发送方A)-->数字签名 接收方B 验签: 1. RSA公钥(发送方A)+数字签名-->摘要1(解密出来的) 2. SHA1/MD5+明文->摘要2(接收方B自己计算的) 3. 摘要1==摘要2 ?"成功":"失败" (对比摘要1和摘要2的值是否一致) ```    #### 对称加密算法 `场景`:适合加密大数据 `算法`:AES DES 3DES PBE RC5 Blowfish 加密/解密场景 ```bash 加密过程:明文+AES/DES+对称加密的KEY->密文 解密过程:密文+AES/DES+对称加密的KEY->明文 ``` AES加密算法的Counter模式 - ECB(电子密码本模式) - CTR(计数器模式) - GCM(CTR和GMAC结合)    #### AEAD加密方式(Authenticated Encryption with Associated Data (AEAD) `介绍`:一种同时具备保密性,完整性和可认证性的加密形式 AEAD - EtM方式:加密 then MAC - MtE方式:MAC then 加密 - E&M方式:加密 and MAC 真正的AEAD - GCM=(AES-CTR模式+GMAC结合) - ChaCha20-IETF-Poly1305 - XChaCha20-IETF-Poly1305    #### 摘要算法 `算法` MD5 SHA1 MAC GMAC ```bash 摘要操作:MD5/SHA1+文本->固定长度的HASH值 ```
阅读 42 评论 0 收藏 0
阅读 42
评论 0
收藏 0

   2019-01-21 15:19:07    2019-01-21 15:19:07   

python
#### 分析 微信认证名中一般都会附带地区名,通过数据库的省市县名和微信名做存在判断(```注意:此方法存在一定概率的误判```) #### 准备 需要省、市、县数据表的数据,需要的话请联系博主 #### 步骤 1.判断对应的省名是否在微信认证名中,如果在,则匹配出数据,并且判断如果是直辖市,获取对应的市级名字,id 2.如果省没匹配成功,则匹配市级数据的名字,如果成功,则获取对应的省名及对应省id 3.如果省市都没匹配成功,则匹配县区级数据的名字,如果成功,则获取对应的省市名及对应省市id ##### 数据表结构 ```sql CREATE TABLE `province` ( `id` bigint(19) NOT NULL AUTO_INCREMENT, `name` varchar(32) NOT NULL COMMENT '省份名称', `short_name` varchar(32) DEFAULT NULL COMMENT '省份简称', `remark` varchar(255) DEFAULT NULL COMMENT '备注', `created_at` datetime NOT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB AUTO_INCREMENT=36 DEFAULT CHARSET=utf8; CREATE TABLE `city` ( `id` bigint(19) NOT NULL AUTO_INCREMENT COMMENT '标识', `name` varchar(32) NOT NULL COMMENT '城市名称', `short_name` varchar(32) DEFAULT NULL COMMENT '城市简称', `province_id` bigint(19) NOT NULL COMMENT '所属省份标识', `level` int(10) NOT NULL COMMENT '城市等级(0未知,1:一线,2:二线,3:三线,4:四线)', `remark` varchar(255) DEFAULT NULL COMMENT '备注', `created_at` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', PRIMARY KEY (`id`), UNIQUE KEY `name` (`name`,`province_id`) USING BTREE ) ENGINE=InnoDB AUTO_INCREMENT=7471 DEFAULT CHARSET=utf8; CREATE TABLE `district` ( `id` bigint(19) NOT NULL AUTO_INCREMENT COMMENT '标识', `name` varchar(32) NOT NULL COMMENT '区县名称', `short_name` varchar(32) DEFAULT NULL COMMENT '区县简称', `city_id` bigint(19) NOT NULL COMMENT '所属城市标识', `remark` varchar(255) DEFAULT NULL COMMENT '备注', `created_at` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', PRIMARY KEY (`id`), UNIQUE KEY `name` (`name`,`city_id`) USING BTREE ) ENGINE=InnoDB AUTO_INCREMENT=10069 DEFAULT CHARSET=utf8; ``` ##### python脚本 ```python vim get_loc.py ``` ```python #coding:utf-8 import pymysql db_host="xx.xx.xx.xx" db_user="root" db_pass="xxxxx" db_port=3306 db_name="xxxx" #微信认证名 file=open("weixin_auth.txt",'r',encoding="utf-8") def execute_query_sql(sql): #循环读取数据库状态是0的关键字100个 db= pymysql.connect(host=db_host,port=db_port,user=db_user, passwd=db_pass, db=db_name) # 使用 cursor() 方法创建一个游标对象 cursor cursor = db.cursor() #执行sql cursor.execute(sql) results=cursor.fetchall() # 关闭数据库连接 db.close() return results n=1 for f in file.readlines(): #获取省 pro_sql='select id,name from province;' result=execute_query_sql(pro_sql) is_has_pro=False pro_id=-1 city_id=-1 district_id=-1 pro_name="" for result_entry in result: if result_entry[1][0:2] in f: is_has_pro=True pro_name=result_entry[1] pro_id=result_entry[0] #这里写死id if pro_name=="北京市": city_id=7151 if pro_name=="上海市": city_id=7122 if pro_name=="天津市": city_id=7182 if pro_name=="重庆市": city_id=7430 break #获取市 is_has_city=False city_name="" if not is_has_pro: city_sql='select id,name from city where province_id>0;' result=execute_query_sql(city_sql) for result_entry in result: if result_entry[1].replace("市","").replace("县","") in f: is_has_city=True city_name=result_entry[1] city_id=result_entry[0] #获取省id pro_id_sql='select province_id from city where id='+str(city_id)+';' result=execute_query_sql(pro_id_sql) pro_id=result[0][0] break #获取区县 is_has_district=False district_name="" if not is_has_city and not is_has_pro: district_sql='select id,name from district where city_id>0;' result=execute_query_sql(district_sql) for result_entry in result: if result_entry[1].replace("市","").replace("县","") in f: is_has_district=True district_name=result_entry[1] district_id=result_entry[0] #获取市id city_id_sql='select city_id from district where id='+str(district_id)+';' result=execute_query_sql(city_id_sql) city_id=result[0][0] #获取省id pro_id_sql='select province_id from city where id='+str(city_id)+';' result=execute_query_sql(pro_id_sql) pro_id=result[0][0] break #打印对应的id #获取名字 if pro_id>0: sql='select name from province where id='+str(pro_id)+';' result=execute_query_sql(sql) pro_name=result[0][0] if city_id>0: sql='select name from city where id='+str(city_id)+';' result=execute_query_sql(sql) city_name=result[0][0] if district_id >0: sql='select name from district where id='+str(district_id)+';' result=execute_query_sql(sql) district_name=result[0][0] n+=1 print("当前执行到第"+str(n)+"行") print(f.strip()+","+str(pro_id)+","+str(city_id)+","+str(district_id)+","+pro_name+","+city_name+","+district_name+"\n") #把数据保存在文件中 file2=open("weixin_auth_loc.txt",'a',encoding="utf-8") file2.write(f.strip()+","+str(pro_id)+","+str(city_id)+","+str(district_id)+","+pro_name+","+city_name+","+district_name+"\n") file2.close() ```
阅读 39 评论 0 收藏 0
阅读 39
评论 0
收藏 0

   2019-01-16 12:00:16    2019-01-16 12:00:16   

python selenium 爬虫
### 本文讲解通过python selenium firefox mysql的方式爬取搜狗微信公众号数据 `说明:搜狗微信的反爬虫,scrapy框架爬取易被检测,使用selenium的方式(缺点:慢。优点:不易被检测到)。` 安装相关软件教程参考:https://ynotes.cn/blog/article_detail/158 #### 流程: 1.脚本循环查询关键字表(table keys)中关键字类型字段(column type)所对应的关键字字段(column keyword)前100条数据 2.通过获取关键字循环去搜狗微信去搜索 3.爬取搜狗搜索出来的微信公众号 4.判断页面是否有分页,有则循环爬取。爬取完一个页面,更新爬取页面数字段(column page_num),所有页面更新关键字表的状态字段(column status[0:表示未爬取,1:表示已爬取]) 5.对爬取出来的数据插入到微信公众号数据表(weixin_data)(建相关数据表) 6.更新关键字表的状态为已爬取状态 #### 数据表结构 ```sql CREATE TABLE `keys` ( `id` int(10) unsigned NOT NULL AUTO_INCREMENT, `keyword` varchar(255) DEFAULT NULL, `page_num` int(11) DEFAULT '0', `status` int(11) DEFAULT '0' COMMENT '0 未搜索 1 已搜索 99 丢弃', `type` varchar(255) DEFAULT NULL, `is_drop` int(11) NOT NULL DEFAULT '0', PRIMARY KEY (`id`) ) ENGINE=InnoDB AUTO_INCREMENT=119750 DEFAULT CHARSET=utf8; CREATE TABLE `weixin_data` ( `id` int(10) unsigned NOT NULL AUTO_INCREMENT, `key_id` int(255) DEFAULT NULL, `weixin_name` varchar(255) DEFAULT NULL, `weixin_account` varchar(255) DEFAULT NULL, `weixin_auth_info` varchar(255) DEFAULT NULL, `is_auth` int(11) DEFAULT NULL, `describe` varchar(6000) DEFAULT NULL, `img_url` varchar(255) DEFAULT NULL, `loc_info` varchar(255) DEFAULT NULL, `privince` varchar(255) DEFAULT NULL, `city` varchar(255) DEFAULT NULL, `district` varchar(255) DEFAULT NULL, `weixin_type` varchar(255) DEFAULT NULL, `other` varchar(255) DEFAULT NULL, PRIMARY KEY (`id`), UNIQUE KEY `weixin_account` (`weixin_account`) ) ENGINE=InnoDB AUTO_INCREMENT=139746 DEFAULT CHARSET=utf8; ``` #### 爬虫脚本 scrapy_sogou.py ```python #coding=utf-8 from selenium import webdriver import time from selenium.common.exceptions import NoSuchElementException,TimeoutException from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC import pymysql import random 指定页面 #参数指定了缓存文件的路径,方便爬取需要登录的网站 profile = webdriver.FirefoxProfile(r'C:\Users\Administrator.GZLX-20180416SV\AppData\Roaming\Mozilla\Firefox\Profiles\yn80ouvt.default') #如果不需要cookie,则不需要指定,使用下面的配置 #profile = webdriver.FirefoxProfile() #禁止加载样式表 profile.set_preference("permissions.default.stylesheet",2) #禁止加载图片 profile.set_preference("permissions.default.image",2) #禁止加载JAVASCRIPT profile.set_preference("javascript.enabled",False) #设置代理 profile.set_preference('network.proxy.type', 1) profile.set_preference('network.proxy.http', 'xx.xx.xx.xx') profile.set_preference('network.proxy.http_port', xxxx) profile.set_preference('network.proxy.ssl', 'xx.xx.xx.xx') profile.set_preference('network.proxy.ssl_port', xxxx) profile.update_preferences() #数据库配置 db_host="xx.xx.xx.xx" db_user="root" db_pass="xxxx" db_port=3306 db_name="weixin_data" #指定Firefox的驱动 driver = webdriver.Firefox(firefox_profile=profile,executable_path="geckodriver") #搜索的关键字 key_search_list=['学校'] index_url='https://weixin.sogou.com/weixin?query=' keys_search_string="" for index in range(0,len(key_search_list)): if index==len(key_search_list)-1: keys_search_string+="'"+key_search_list[index]+"'" else: keys_search_string+="'"+key_search_list[index]+"'," class AnyEc: """ Use with WebDriverWait to combine expected_conditions in an OR. """ def __init__(self, *args): self.ecs = args def __call__(self, driver): for fn in self.ecs: try: if fn(driver): return True except: pass def execute_query_sql(sql): #循环读取数据库状态是0的关键字100个 db= pymysql.connect(host=db_host,port=db_port,user=db_user, passwd=db_pass, db=db_name) # 使用 cursor() 方法创建一个游标对象 cursor cursor = db.cursor() #执行sql cursor.execute(sql) results=cursor.fetchall() # 关闭数据库连接 db.close() return results def execute_update_sql(sql): #循环读取数据库状态是0的关键字100个 db= pymysql.connect(host=db_host,port=db_port,user=db_user, passwd=db_pass, db=db_name) # 使用 cursor() 方法创建一个游标对象 cursor cursor = db.cursor() # 执行sql语句 cursor.execute(sql) # 提交到数据库执行 db.commit() #执行sql # 关闭数据库连接 db.close() #爬取网站内容的函数 def parseWeb(driver,key_name,page_N): print("开始提取关键字"+key_name+",第"+str(page_N)+"页的数据:") for page in driver.find_elements_by_xpath('//ul[@class="news-list2"]/li'): weixin_name=page.find_element_by_xpath('./div[@class="gzh-box2"]/div[@class="txt-box"]/p[@class="tit"]/a').text img_url=page.find_element_by_xpath('./div[@class="gzh-box2"]/div[@class="img-box"]/a/img').get_attribute("src") weixin_account=page.find_element_by_xpath('./div[@class="gzh-box2"]/div[@class="txt-box"]/p[@class="info"]/label').text weixin_auth_info="" try: page.find_element_by_xpath('./dl[2]/dt[contains(text(),微信认证)]') dl_info=page.find_element_by_xpath('./dl[2]/dt').text if '微信认证' in dl_info: weixin_auth_info=page.find_element_by_xpath('.//dl[2]/dd').text except NoSuchElementException: weixin_auth_info="" print("微信认证:"+weixin_auth_info) try: page.find_element_by_xpath('./div[@class="gzh-box2"]/div[@class="txt-box"]/p[@class="tit"]/i') is_auth=1 except NoSuchElementException: is_auth=0 try: describe=page.find_element_by_xpath('.//dl[1]/dd').text except NoSuchElementException: describe="" #把数据插入酷内 insert_sql='insert into weixin_data(key_id,weixin_name,weixin_account,weixin_auth_info,is_auth,img_url,`describe`,weixin_type,other) values('+str(key_id)+',"'+weixin_name+'","'+weixin_account+'","'+weixin_auth_info+'",'+str(is_auth)+',"'+img_url+'","'+describe+'","'+'培训机构'+'",'+'NULL'+'); ' #print(insert_sql) try: print("准备插入数据:"+weixin_name) execute_update_sql(insert_sql) except: print("插入数据异常,可能是重复数据") #更新当前页数 update_sql='update `keys` set page_num='+str(page_N)+' where keyword="'+key_name+'";' print(update_sql) try: execute_update_sql(update_sql) except: print("更新爬取页数错误") return False return True #判断页面是否加载完成 def pageIsLoadFinished(driver): try: WebDriverWait(driver, 10).until( AnyEc( EC.presence_of_element_located( (By.XPATH, u'//div[@class="gzh-box2"]/div[@class="img-box"]/a/img')), EC.presence_of_element_located( (By.XPATH, u'//p[@class="ip-time-p"]')), EC.presence_of_element_located( (By.XPATH, u'//div[@id="noresult_part1_container"]')) )) return True except TimeoutException: return False #页面是否正常 def pageIsNomal(driver): try: driver.find_element_by_xpath('//p[@class="ip-time-p"]') print("IP访问频繁,准备重启浏览器") time.sleep(3) return False except NoSuchElementException: return True #页面是否404 def pageIsNotFound(driver,key_name): try: driver.find_element_by_xpath('//div[@id="noresult_part1_container"]') print("关键字"+key_name+"没有找到,搜索下一个关键字") return True except NoSuchElementException: return False #跳到指定页 def jumpNumPage(driver,page_N): #判断是否是当前页 try: current_page=driver.find_element_by_xpath('//div[@id="pagebar_container"]/span').text if int(page_N) == int(current_page): print("已经在当前页,无需跳转") return True except: print("没有当前页"+str(page_N)) return False try: driver.find_element_by_xpath('//div[@id="pagebar_container"]/a[@id="sogou_page_'+str(page_N)+'"]').click() except NoSuchElementException: print("没有第"+str(page_N)+"页面") return False return True #跳到下一页 def jumpNextPage(driver): try: driver.find_element_by_xpath('//div[@id="pagebar_container"]/a[@id="sogou_next"]').click() except NoSuchElementException: print("没有下一页") return False return True #页面是否准备好 def PageIsReady(driver,key_name,page_N): #判断页面已经加载完成,并且不存在ip频繁访问页面 if pageIsLoadFinished(driver) and pageIsNomal(driver): #判断页面不存在指定的标签页 if not jumpNumPage(driver,page_N): #判断页面是否404 if pageIsNotFound(driver,key_name): #更新数据库关键字字段 update_status_sql='update `keys` set status=1 where keyword="'+key_name+'";' try: execute_update_sql(update_status_sql) except: print("更新关键字"+key_name+"的状态失败!!!") return True else: return False return True #循环抓取 while True: get_keys="SELECT id,keyword FROM keys where status=0 and is_drop=0 and type in ("+keys_search_string+") limit 100;" print(get_keys) print("获取关键字中...") try: results=execute_query_sql(get_keys) except: print("数据库查询关键字失败,停止爬虫") break print("关键字查找完成") #生成url id_keys=[ re for re in results ] for id_key in id_keys: key_id=id_key[0] key_name=id_key[1] url=index_url+key_name print("开始爬取:"+url) try: driver.get(url) except TimeoutException: continue #获取爬取key的页数 get_page_sql='select page_num from `keys` where id='+str(key_id)+';' page_N=execute_query_sql(get_page_sql)[0][0]+1 if PageIsReady(driver,key_name,page_N): if not parseWeb(driver,key_name,page_N): continue else: time.sleep(1) driver.close() driver = webdriver.Firefox(firefox_profile=profile,executable_path="geckodriver") continue #跳转到当前页爬取 #爬取完当前页更新key关键字 #判断是否有下一页继续爬取,如果只爬取一页,则注释下面的代码 ##########是否爬取搜索关键字的所有页面--start isOk=True while jumpNextPage(driver): get_page_sql='select page_num from `keys` where id='+str(key_id)+';' page_N=execute_query_sql(get_page_sql)[0][0]+1 if PageIsReady(driver,key_name,page_N): if not parseWeb(driver,key_name,page_N): isOk=False break else: isOk=False break if not isOk: time.sleep(1) driver.close() driver = webdriver.Firefox(firefox_profile=profile,executable_path="geckodriver") continue ##########是否爬取搜索关键字的所有页面--end #更新关键字状态 update_status_sql='update `keys` set status=1 where keyword="'+key_name+'";' try: execute_update_sql(update_status_sql) except: print("更新数据库关键字"+key_name+"的状态发生错误") ```
阅读 57 评论 0 收藏 0
阅读 57
评论 0
收藏 0

   2019-01-07 18:07:31    2019-01-07 18:07:31   

selenium 爬虫
1.安装firefox驱动 Chrome: https://sites.google.com/a/chromium.org/chromedriver/downloads Firefox: https://github.com/mozilla/geckodriver/releases 2.安装python3.6 3.安装selenium `pip install selenium` 4.把firefox驱动所在路径加入环境变量或者把驱动文件放到python目录 5.编辑爬虫脚本(根据所爬取的网站做相应修改) `查找元素函数的使用方法:`https://selenium-python.readthedocs.io/locating-elements.html `vim scrapy.py` ```python #coding=utf-8 from selenium import webdriver import time from selenium.common.exceptions import NoSuchElementException #设置cookie fp=webdriver.FirefoxProfile(r'C:\Users\Administrator\AppData\Roaming\Mozilla\Firefox\Profiles\yn80ouvt.default') #指定Firefox的驱动 driver = webdriver.Firefox(fp,executable_path="geckodriver") #指定页面 driver.get('https://admin.xxxx.com/chosen/channel_side#/wareHouse/index') #分页爬取 for i in range(1,1000): #js执行scroll滚动到页面的顶部 js="var q=document.getElementsByClassName('base_right editbase_right')[0].scrollTop=0" driver.execute_script(js) time.sleep(5) #查找页面的tr元素 for i in driver.find_elements_by_xpath('//tr[@class="ET4 EC7"]'): #获取图片url img_url=i.find_element_by_xpath('./td[@class="lessonInfo"]/div[@class="lessonImg"]/img').get_attribute("src") #获取标题名 title=i.find_element_by_xpath('./td[@class="lessonInfo"]/div[@class="lessonContent"]/p[1]').text #获取类型 type=i.find_element_by_xpath('./td[2]').text #获取状态 status=i.find_element_by_xpath('./td[@class="AuditState"]/descendant::span[1]').text.strip() if status == '已上架': #点击元素加载二维码图片 i.find_element_by_xpath('./td[@class="operateItems"]/div/div[@class="ET4 EC1"]/a[4]').click() qcode=driver.find_element_by_xpath('//div[@class="share_box"]/a[@class="download_link EC1"]').get_attribute("href") #保存当前窗口的句柄 window_before = driver.window_handles[0] #点击触发打开新窗口 i.find_element_by_xpath('./td[@class="lessonInfo"]/div[@class="lessonContent"]/p[1]').click() #保存新窗口句柄 window_after = driver.window_handles[1] #跳转到新窗口 driver.switch_to_window(window_after) #等待3s加载页面 time.sleep(3) try: #获取页面内容 learn_count=driver.find_element_by_xpath('//div[@class="price_box_after" or @class="price_box_before"]/p[3]/span').text except NoSuchElementException: print("页面人数查询失败") #关闭新打开的窗口 driver.close() #跳转到之前保存的窗口 driver.switch_to_window(window_before) #保存数据 file=open('F:\\demo\\courses.txt','a',encoding='utf-8') file.write(title+","+learn_count+","+img_url+","+qcode+","+type+","+status+"\n") file.close() try: driver.find_element_by_xpath('//span[@class="page-index active"]/following-sibling::span/following-sibling::button[@class="btn-pager"]').click() except NoSuchElementException: driver.close() break ``` 6.首先通过firefox登陆对应的网站,目的是为了生成登陆的cookie 7.执行爬虫脚本 `python scrapy.py`
阅读 44 评论 0 收藏 0
阅读 44
评论 0
收藏 0

   2018-11-11 23:24:22    2018-11-11 23:24:22   

c语言 C Windows API
阅读 317 评论 0 收藏 0
阅读 317
评论 0
收藏 0

   2018-11-01 11:33:22    2018-11-01 11:33:22   

python django 邮件
阅读 50 评论 0 收藏 0
阅读 50
评论 0
收藏 0

   2018-10-29 23:59:04    2018-10-29 23:59:04   

贪吃蛇 easyx c语言
安装easyx,文件保存为cpp后缀 ```C #include <graphics.h> #include <conio.h> #include <time.h> #include <stdio.h> #include <math.h> #include <windows.h> #define KEYDOWN(vk_code) ((GetAsyncKeyState(vk_code) & 0x8000) ? 1 : 0) void init(void);//初始化 void gamebegin(void);//游戏开始 void gameplay(void);//开始游戏 void gameend(void);//游戏借宿 void drawsnake(void);//绘制小蛇 void drawfood(void);//绘制食物 //定义坐标struct struct Point{ int x,y; }; struct Point aps[3871]; //定义界面所有点坐标数组 struct Point snake[3871];//定义蛇身的各个位置的坐标数组 struct Point food[100];//定义食物的各个位置的坐标数组 int apsindex=0; //定义界面所有点坐标数组索引 int snakeindex=0; //定义蛇身的各个位置的坐标数组索引 int foodindex=0; //定义食物的各个位置的坐标数组索引 struct Point snakehead={300,250};//定义蛇头的位置 int snakedirect; //1为上,2为下,3为左,4为右 int snakespeed=1; //设置蛇的速度 int snakelength=3; //初始化蛇的长度 int gamestop=0; //游戏是否结束 int stoptype; //游戏结束原因,值为1,则是撞墙,值为2则为碰到身子 int gamescore=0; //游戏分数 int gamelevel=1; //游戏等级 void main(void){ init(); gamebegin(); gameplay(); gameend(); } //初始化函数 void init(void){ initgraph(1100,600); } //绘制分数 void drawscore(int score){ char s[50]; sprintf(s,"分数:%4d",score); settextstyle(18, 0, _T("黑体")); settextcolor(RGB(255,255,0)); outtextxy(950, 60, s); } //绘制等级 void drawlevel(int level){ char s[50]; sprintf(s,"等级:%4d",level); settextstyle(18, 0, _T("黑体")); settextcolor(RGB(255,255,0)); outtextxy(950, 80, s); } //绘制蛇 void drawsnake(){ setlinecolor(RGB(255,255,255)); setlinestyle(PS_SOLID, 1); for(int i=0;i<snakeindex;i++){ if(i==0){ setfillcolor(RGB(255,0,0)); }else{ setfillcolor(RGB(128,128,0)); } fillrectangle(snake[i].x-5,snake[i].y-5,snake[i].x+5,snake[i].y+5); } } //擦除蛇尾 void earsesnaketail(){ //setlinestyle(PS_SOLID, 0); setlinecolor(RGB(0,128,0)); setfillcolor(RGB(0,128,0)); fillrectangle(snake[snakeindex-1].x-5,snake[snakeindex-1].y-5,snake[snakeindex-1].x+5,snake[snakeindex-1].y+5); } //绘制食物 void drawfood(Point p1){ setlinestyle(PS_SOLID, 1); setfillcolor(RGB(255,255,0)); fillrectangle(p1.x-5,p1.y-5,p1.x+5,p1.y+5); } //生成食物 void genfood(int index=-1){ //生产随机数0~3871 int r=rand()%3872; //判断随机点aps[r]是否和蛇身所在位置是否重合 for(int j=0;j<snakeindex;j++){ if(aps[r].x==snake[j].x&&aps[r].y==snake[j].y){ //如果发生重合,j减一,重新循环一次 j--; //重新生成随机数 r=rand()%3872; } } if(index!=-1){ food[index]=aps[r]; }else{ food[foodindex]=aps[r]; foodindex++; } } //游戏开始 void gamebegin(){ //1 绘制游戏界面 //1.1 设置游戏框 setfillcolor(RGB(0,128,0)); setlinestyle(PS_SOLID, 10); fillrectangle(100,50,900,550); //绘制分数 drawscore(gamescore); //绘制等级 drawlevel(gamelevel); //初始化所有点的坐标 for(int i=110;i<=890;i+=10){ for(int j=60;j<=540;j+=10){ aps[apsindex].x=i; aps[apsindex].y=j; apsindex++; printf("%d,%d ",i,j); } } //初始化小蛇身体数组 for(i=0;i<snakelength;i++){ snake[i].x=snakehead.x-10*i; snake[i].y=snakehead.y; snakeindex++; } //初始化食物的数组 //初始化随机数种子 srand((unsigned)time(NULL)); //绘制小蛇 drawsnake(); //绘制食物 //随机5个食物 for(i=0;i<10;i++){ genfood(); } //画出食物 for(i=0;i<foodindex;i++){ drawfood(food[i]); } //drawfood(); } //方向函数 void toward(struct Point temp){ //判断是否撞墙 if(temp.x<110||temp.x>890||temp.y<60||temp.y>540){ gamestop=1; stoptype=1; } //判断是否蛇头碰到身子 for(int i=0;i<snakeindex;i++){ if(temp.x==snake[i].x&&temp.y==snake[i].y){ gamestop=1; stoptype=2; } } //定义蛇是否迟到食物变量iseat bool iseat=0; //判断是否吃到食物 for(i=0;i<foodindex;i++){ if(food[i].x==temp.x&&food[i].y==temp.y){ iseat=1; //计算游戏分数 gamescore=gamescore+(snakeindex-snakelength+1)*2; if(gamescore>400){ gamelevel=6; }else if(gamescore>300){ gamelevel=5; }else if(gamescore>200){ gamelevel=4; }else if(gamescore>100){ gamelevel=3; }else if(gamescore>10){ gamelevel=2; } genfood(i); drawfood(food[i]); } } Sleep(10); if(iseat){ for(int i=snakeindex;i>0;i--){ snake[i]=snake[i-1]; } snakeindex++; snake[0]=temp; }else{ earsesnaketail(); for(int i=snakeindex;i>0;i--){ snake[i]=snake[i-1]; } snake[0]=temp; } drawsnake(); } //向上 void towardsUp(){ struct Point newhead={snake[0].x,snake[0].y-10}; toward(newhead); } //向下 void towardsDown(){ struct Point newhead={snake[0].x,snake[0].y+10}; toward(newhead); } //向左 void towardsLeft(){ struct Point newhead={snake[0].x-10,snake[0].y}; toward(newhead); } //向右 void towardsRight(){ struct Point newhead={snake[0].x+10,snake[0].y}; toward(newhead); } void gameplay(){ //判断方向键,绘制蛇头的方块的方向,同时在蛇尾去掉方块 //判断方块是否和墙面接触,如果是则游戏结束,否则,继续往最后一次按键方向绘制方块 int dr=0; while(gamestop!=1){ //判断方向键是否为上 if(KEYDOWN(VK_UP)&&dr!=2){ dr=1; } //判断方向键是否为下 if(KEYDOWN(VK_DOWN)&&dr!=1){ dr=2; } //判断方向键是否为左 if(KEYDOWN(VK_LEFT)&&dr!=4){ dr=3; } //判断方向键是否为右 if(KEYDOWN(VK_RIGHT)&&dr!=3){ dr=4; } switch(dr){ case 1: towardsUp(); break; case 2: towardsDown(); break; case 3: towardsLeft(); break; case 4: towardsRight(); break; } //绘制分数 drawscore(gamescore); //绘制等级 drawlevel(gamelevel); switch(gamelevel){ case 1: Sleep(80); break; case 2: Sleep(70); break; case 3: Sleep(60); break; case 4: Sleep(50); break; case 5: Sleep(40); break; case 6: Sleep(20); break; } } } void gameend(){ //绘制结束框 setfillcolor(RGB(255,0,0)); setlinestyle(PS_SOLID, 10); fillrectangle(300,200,700,300); char s[200]; if(stoptype==1){ sprintf(s,"你撞墙了,游戏结束!"); }else if(stoptype==2){ sprintf(s,"你咬到自己了,游戏结束!"); }else{ sprintf(s,"游戏结束!"); } settextstyle(18, 0, _T("黑体")); settextcolor(RGB(255,255,0)); RECT r = {300,200,700,300}; setbkmode(TRANSPARENT); drawtext(s, &r, DT_CENTER | DT_VCENTER | DT_SINGLELINE); while(1){ Sleep(10000); }; getch(); closegraph(); } ``` ![](https://image.ynotes.cn/18-10-30/5567999.jpg) `下载链接`[贪吃蛇下载](https://image.ynotes.cn/%E8%B4%AA%E5%90%83%E8%9B%87.exe)
阅读 989 评论 0 收藏 0
阅读 989
评论 0
收藏 0

第 1 页 / 共 4 页
 
第 1 页 / 共 4 页