Python-scrapy爬虫

scrapy API | 开源project-github

1. demo

1
2
3
4
5
6
7
8
9
10
11
import scrapy
class QuotesSpider(scrapy.Spider):
name = 'quotes' start_urls = [
'http://quotes.toscrape.com/tag/humor/',
]
def parse(self, response):
for quote in response.css('div.quote'):
yield {
'author': quote.xpath('span/small/text()').get(),
'text': quote.css('span.text::text').get(),
}