From 1e18327a9e8fa12713ec97b45a7c64353dd2ed1d Mon Sep 17 00:00:00 2001 From: Jordan ERNST Date: Tue, 11 Aug 2020 12:18:24 +0200 Subject: [PATCH] Simplify start_requests: https://docs.scrapy.org/en/latest/intro/tutorial.html#a-shortcut-to-the-start-requests-method --- ImmoScrap/spiders/PAP.py | 11 ++++------- ImmoScrap/spiders/Seloger.py | 11 ++++------- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/ImmoScrap/spiders/PAP.py b/ImmoScrap/spiders/PAP.py index f0fadbd..2f6a6b7 100644 --- a/ImmoScrap/spiders/PAP.py +++ b/ImmoScrap/spiders/PAP.py @@ -13,13 +13,10 @@ class PAPSpider(scrapy.Spider): next_page_xpath = '//a[@id="pagination-next"]/@href' - def start_requests(self): - urls = [ - 'https://www.pap.fr/annonce/vente-appartement-immeuble-maison-vienne-38200-g21767-jusqu-a-300000-euros', - 'https://www.pap.fr/annonce/vente-appartement-immeuble-maison-saint-etienne-42-g43641-jusqu-a-300000-euros' - ] - for url in urls: - yield scrapy.Request(url=url, callback=self.parse) + start_urls = [ + 'https://www.pap.fr/annonce/vente-appartement-immeuble-maison-vienne-38200-g21767-jusqu-a-300000-euros', + 'https://www.pap.fr/annonce/vente-appartement-immeuble-maison-saint-etienne-42-g43641-jusqu-a-300000-euros' + ] def parse(self, response): ads = response.css(self.ads_css_sel) diff --git a/ImmoScrap/spiders/Seloger.py b/ImmoScrap/spiders/Seloger.py index 0b2199b..df62330 100644 --- a/ImmoScrap/spiders/Seloger.py +++ b/ImmoScrap/spiders/Seloger.py @@ -35,13 +35,10 @@ class SelogerSpider(scrapy.Spider): infos_css_sel = ".eJYQQA" # Contains Rooms, bedrooms, area pagination_xpath = '//div[has-class("ckWPHD")]//text()' - def start_requests(self): - urls = [ - f'{self.baseurl}?{urlencode(self.vienne_params)}', - f'{self.baseurl}?{urlencode(self.stetienne_params)}' - ] - for url in urls: - yield scrapy.Request(url=url, headers=self.headers, callback=self.parse) + start_urls = [ + f'{baseurl}?{urlencode(vienne_params)}', + f'{baseurl}?{urlencode(stetienne_params)}' + ] def parse(self, response): ads = response.css(self.ads_css_sel)