From ec7acf93088a9bb8c8457fd6cbdf45b202a0bd9a Mon Sep 17 00:00:00 2001 From: Jordan ERNST Date: Sun, 16 Aug 2020 18:55:03 +0200 Subject: [PATCH] PricePerSqm Pipeline --- ImmoScrap/pipelines.py | 11 ++++------- ImmoScrap/spiders/PAP.py | 4 ++-- ImmoScrap/spiders/Seloger.py | 4 ++-- ImmoScrap/spiders/leboncoin.py | 2 +- 4 files changed, 9 insertions(+), 12 deletions(-) diff --git a/ImmoScrap/pipelines.py b/ImmoScrap/pipelines.py index de123b6..b6758dc 100644 --- a/ImmoScrap/pipelines.py +++ b/ImmoScrap/pipelines.py @@ -10,11 +10,8 @@ class PricePerSqmPipeline: def process_item(self, item, spider): - ''' - pricefloat = float(price.replace(',', '.').replace(' ', '').replace('€', '')) - areafloat = float(area.replace(',', '.').replace(' ', '').replace('m²', '')) - pricesqmint = round(pricefloat / areafloat) - pricepersqm = float(item['price']) / float(item['area']) - item['price/sqm'] = pricepersqm - ''' + area = item['area'] + if area is not None: + pricepersqm = item['price'] / float(item['area']) + item['price/sqm'] = round(pricepersqm) return item diff --git a/ImmoScrap/spiders/PAP.py b/ImmoScrap/spiders/PAP.py index 2f6a6b7..3638cf5 100644 --- a/ImmoScrap/spiders/PAP.py +++ b/ImmoScrap/spiders/PAP.py @@ -37,10 +37,10 @@ class PAPSpider(scrapy.Spider): yield { 'url': self.baseurl + url, 'type': 'Unknown', - 'price': ad.css(self.price_css_sel).get().replace('.', '').replace('\xa0€', ''), + 'price': int(ad.css(self.price_css_sel).get().replace('.', '').replace('\xa0€', '')), 'rooms': rooms, 'bedrooms': bedrooms, - 'area': infos.xpath('./li[contains(small, "m")]/text()').get() + 'area': infos.xpath('./li[contains(small, "m")]/text()').get().replace(' ', '') } next_page = response.xpath(self.next_page_xpath).get() diff --git a/ImmoScrap/spiders/Seloger.py b/ImmoScrap/spiders/Seloger.py index dbfcab5..644d93a 100644 --- a/ImmoScrap/spiders/Seloger.py +++ b/ImmoScrap/spiders/Seloger.py @@ -57,10 +57,10 @@ class SelogerSpider(scrapy.Spider): yield { 'url': ad.css(self.url_css_sel).get().split('?')[0], 'type': ad.css(self.type_css_sel).get().replace('/Villa', ''), - 'price': ad.css(self.price_css_sel).get().replace('\xa0', '').replace(' €', ''), + 'price': int(ad.css(self.price_css_sel).get().replace('\xa0', '').replace(' €', '')), 'rooms': rooms, 'bedrooms': bedrooms, - 'area': area + 'area': area.replace(',', '.') } active_page = response.url active_page_nb = int(active_page.split('LISTING-LISTpg=')[1]) diff --git a/ImmoScrap/spiders/leboncoin.py b/ImmoScrap/spiders/leboncoin.py index bcfd959..a21e809 100644 --- a/ImmoScrap/spiders/leboncoin.py +++ b/ImmoScrap/spiders/leboncoin.py @@ -87,7 +87,7 @@ class LeboncoinSpider(scrapy.Spider): yield { 'url': ad['url'], 'type': type, - 'price': ad['price'][0], + 'price': int(ad['price'][0]), 'rooms': rooms, 'bedrooms': 'Unknown', 'area': area