Last changes
This commit is contained in:
parent
ec7acf9308
commit
6ce018418a
|
@ -14,8 +14,8 @@ NEWSPIDER_MODULE = 'ImmoScrap.spiders'
|
|||
LOG_LEVEL = 'WARNING'
|
||||
|
||||
FEEDS = {
|
||||
'export.json': {
|
||||
'format': 'json'
|
||||
'export.jsonl': {
|
||||
'format': 'jsonlines'
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -32,7 +32,7 @@ ROBOTSTXT_OBEY = False
|
|||
# Configure a delay for requests for the same website (default: 0)
|
||||
# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
|
||||
# See also autothrottle settings and docs
|
||||
DOWNLOAD_DELAY = 2 # Beetween 1 and 3 seconds : RANDOMIZE_DOWNLOAD_DELAY enabled by default
|
||||
DOWNLOAD_DELAY = 5 # Beetween 2.5 and 7.5 seconds : RANDOMIZE_DOWNLOAD_DELAY enabled by default
|
||||
# The download delay setting will honor only one of:
|
||||
# CONCURRENT_REQUESTS_PER_DOMAIN = 16
|
||||
# CONCURRENT_REQUESTS_PER_IP = 16
|
||||
|
|
|
@ -48,7 +48,7 @@ class SelogerSpider(scrapy.Spider):
|
|||
rooms = infos.xpath('./li[contains(., "p")]/text()').get()
|
||||
bedrooms = infos.xpath('./li[contains(., "ch")]/text()').get()
|
||||
if area is not None:
|
||||
area = area.replace(' m²', '')
|
||||
area = area.replace(' m²', '').replace(',', '.')
|
||||
if rooms is not None:
|
||||
rooms = rooms.replace(' p', '')
|
||||
if bedrooms is not None:
|
||||
|
@ -60,7 +60,7 @@ class SelogerSpider(scrapy.Spider):
|
|||
'price': int(ad.css(self.price_css_sel).get().replace('\xa0', '').replace(' €', '')),
|
||||
'rooms': rooms,
|
||||
'bedrooms': bedrooms,
|
||||
'area': area.replace(',', '.')
|
||||
'area': area
|
||||
}
|
||||
active_page = response.url
|
||||
active_page_nb = int(active_page.split('LISTING-LISTpg=')[1])
|
||||
|
|
Loading…
Reference in New Issue