Last changes

This commit is contained in:
Jordan ERNST 2021-12-12 21:58:25 +01:00
parent ec7acf9308
commit 6ce018418a
2 changed files with 5 additions and 5 deletions

View File

@ -14,8 +14,8 @@ NEWSPIDER_MODULE = 'ImmoScrap.spiders'
LOG_LEVEL = 'WARNING'
FEEDS = {
'export.json': {
'format': 'json'
'export.jsonl': {
'format': 'jsonlines'
}
}
@ -32,7 +32,7 @@ ROBOTSTXT_OBEY = False
# Configure a delay for requests for the same website (default: 0)
# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
# See also autothrottle settings and docs
DOWNLOAD_DELAY = 2 # Beetween 1 and 3 seconds : RANDOMIZE_DOWNLOAD_DELAY enabled by default
DOWNLOAD_DELAY = 5 # Beetween 2.5 and 7.5 seconds : RANDOMIZE_DOWNLOAD_DELAY enabled by default
# The download delay setting will honor only one of:
# CONCURRENT_REQUESTS_PER_DOMAIN = 16
# CONCURRENT_REQUESTS_PER_IP = 16

View File

@ -48,7 +48,7 @@ class SelogerSpider(scrapy.Spider):
rooms = infos.xpath('./li[contains(., "p")]/text()').get()
bedrooms = infos.xpath('./li[contains(., "ch")]/text()').get()
if area is not None:
area = area.replace('', '')
area = area.replace('', '').replace(',', '.')
if rooms is not None:
rooms = rooms.replace(' p', '')
if bedrooms is not None:
@ -60,7 +60,7 @@ class SelogerSpider(scrapy.Spider):
'price': int(ad.css(self.price_css_sel).get().replace('\xa0', '').replace('', '')),
'rooms': rooms,
'bedrooms': bedrooms,
'area': area.replace(',', '.')
'area': area
}
active_page = response.url
active_page_nb = int(active_page.split('LISTING-LISTpg=')[1])