主要是两个文件~
pipelines
from twisted.enterprise import adbapi import pymysql class Www0577HomePipeline(object): def __init__(self,mysql_config): self.dbpool = adbapi.ConnectionPool( mysql_config['DRIVER'], host=mysql_config['HOST'], port=mysql_config['PORT'], user=mysql_config['USER'], password=mysql_config['PASSWORD'], db=mysql_config['DATABASE'], charset='utf8' ) @classmethod def from_crawler(cls,crawler): # 只要重写了from_crawler方法,那么以后创建对象的时候,就会调用这个方法来获取pipline对象 mysql_config = crawler.settings['MYSQL_CONFIG'] return cls(mysql_config) def process_item(self, item, spider): result = self.dbpool.runInteraction(self.insert_item,item) result.addErrback(self.insert_error) return item #SQL语句~ def insert_item(self,cursor,item): sql = "insert into data(id,name,address,sell_address,price,type,developer,telephone,years,time_dev,time_ok,link) values(null,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)" args = (item['name'],item['address'],item['sell_address'],item['price'],item['type'],item['developer'],item['telephone'],item['years'],item['time_dev'],item['time_ok'],item['link']) cursor.execute(sql,args) def insert_error(self,failure): print("="*30) print(failure) print("="*30) def close_spider(self,spider): self.dbpool.close()
Settings.py文件需要添加数据库信息
MYSQL_CONFIG = { 'DRIVER': "pymysql", 'HOST': "127.0.0.1", 'PORT': 3306, # port必须为整形 'USER': 'root', 'PASSWORD': 'root', 'DATABASE': "XXXX" }