WIP: pipelines, code cleanup, export now to JSON

This commit is contained in:
Jordan ERNST 2020-08-16 18:10:59 +02:00
parent 34e577a794
commit 828035e166
3 changed files with 14 additions and 13 deletions

View File

@ -5,9 +5,16 @@
# useful for handling different item types with a single interface
from itemadapter import ItemAdapter
# from itemadapter import ItemAdapter
class ImmoscrapPipeline:
class PricePerSqmPipeline:
def process_item(self, item, spider):
'''
pricefloat = float(price.replace(',', '.').replace(' ', '').replace('', ''))
areafloat = float(area.replace(',', '.').replace(' ', '').replace('', ''))
pricesqmint = round(pricefloat / areafloat)
pricepersqm = float(item['price']) / float(item['area'])
item['price/sqm'] = pricepersqm
'''
return item

View File

@ -14,8 +14,8 @@ NEWSPIDER_MODULE = 'ImmoScrap.spiders'
LOG_LEVEL = 'WARNING'
FEEDS = {
'export.csv': {
'format': 'csv'
'export.json': {
'format': 'json'
}
}
@ -80,9 +80,9 @@ EXTENSIONS = {
# Configure item pipelines
# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
# ITEM_PIPELINES = {
# 'ImmoScrap.pipelines.ImmoscrapPipeline': 300,
# }
ITEM_PIPELINES = {
'ImmoScrap.pipelines.PricePerSqmPipeline': 300,
}
# Enable and configure the AutoThrottle extension (disabled by default)
# See https://docs.scrapy.org/en/latest/topics/autothrottle.html

View File

@ -62,12 +62,6 @@ class SelogerSpider(scrapy.Spider):
'bedrooms': bedrooms,
'area': area
}
"""
pricefloat = float(price.replace(',', '.').replace(' ', '').replace('', ''))
areafloat = float(area.replace(',', '.').replace(' ', '').replace('', ''))
pricesqmint = round(pricefloat / areafloat)
print(url, type, price, rooms, bedrooms, area, pricesqmint)
"""
active_page = response.url
active_page_nb = int(active_page.split('LISTING-LISTpg=')[1])