Scrapy爬虫数据存入到MySql数据库

主要是两个文件~
pipelines

from twisted.enterprise import adbapi
import pymysql

class Www0577HomePipeline(object):
    def __init__(self,mysql_config):
        self.dbpool = adbapi.ConnectionPool(
            mysql_config['DRIVER'],
            host=mysql_config['HOST'],
            port=mysql_config['PORT'],
            user=mysql_config['USER'],
            password=mysql_config['PASSWORD'],
            db=mysql_config['DATABASE'],
            charset='utf8'
        )

    @classmethod
    def from_crawler(cls,crawler):
        # 只要重写了from_crawler方法,那么以后创建对象的时候,就会调用这个方法来获取pipline对象
        mysql_config = crawler.settings['MYSQL_CONFIG']
        return cls(mysql_config)

    def process_item(self, item, spider):
        result = self.dbpool.runInteraction(self.insert_item,item)
        result.addErrback(self.insert_error)
        return item
     #SQL语句~
    def insert_item(self,cursor,item):
        sql = "insert into data(id,name,address,sell_address,price,type,developer,telephone,years,time_dev,time_ok,link) values(null,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
        args = (item['name'],item['address'],item['sell_address'],item['price'],item['type'],item['developer'],item['telephone'],item['years'],item['time_dev'],item['time_ok'],item['link'])
        cursor.execute(sql,args)

    def insert_error(self,failure):
        print("="*30)
        print(failure)
        print("="*30)

    def close_spider(self,spider):
        self.dbpool.close()

Settings.py文件需要添加数据库信息

MYSQL_CONFIG = {
    'DRIVER': "pymysql",
    'HOST': "127.0.0.1",
    'PORT': 3306, # port必须为整形
    'USER': 'root',
    'PASSWORD': 'root',
    'DATABASE': "XXXX"
}

发表评论

您的电子邮箱地址不会被公开。 必填项已用*标注