WIP: pipelines, code cleanup, export now to JSON
This commit is contained in:
parent
34e577a794
commit
828035e166
|
@ -5,9 +5,16 @@
|
||||||
|
|
||||||
|
|
||||||
# useful for handling different item types with a single interface
|
# useful for handling different item types with a single interface
|
||||||
from itemadapter import ItemAdapter
|
# from itemadapter import ItemAdapter
|
||||||
|
|
||||||
|
|
||||||
class ImmoscrapPipeline:
|
class PricePerSqmPipeline:
|
||||||
def process_item(self, item, spider):
|
def process_item(self, item, spider):
|
||||||
|
'''
|
||||||
|
pricefloat = float(price.replace(',', '.').replace(' ', '').replace('€', ''))
|
||||||
|
areafloat = float(area.replace(',', '.').replace(' ', '').replace('m²', ''))
|
||||||
|
pricesqmint = round(pricefloat / areafloat)
|
||||||
|
pricepersqm = float(item['price']) / float(item['area'])
|
||||||
|
item['price/sqm'] = pricepersqm
|
||||||
|
'''
|
||||||
return item
|
return item
|
||||||
|
|
|
@ -14,8 +14,8 @@ NEWSPIDER_MODULE = 'ImmoScrap.spiders'
|
||||||
LOG_LEVEL = 'WARNING'
|
LOG_LEVEL = 'WARNING'
|
||||||
|
|
||||||
FEEDS = {
|
FEEDS = {
|
||||||
'export.csv': {
|
'export.json': {
|
||||||
'format': 'csv'
|
'format': 'json'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -80,9 +80,9 @@ EXTENSIONS = {
|
||||||
|
|
||||||
# Configure item pipelines
|
# Configure item pipelines
|
||||||
# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
||||||
# ITEM_PIPELINES = {
|
ITEM_PIPELINES = {
|
||||||
# 'ImmoScrap.pipelines.ImmoscrapPipeline': 300,
|
'ImmoScrap.pipelines.PricePerSqmPipeline': 300,
|
||||||
# }
|
}
|
||||||
|
|
||||||
# Enable and configure the AutoThrottle extension (disabled by default)
|
# Enable and configure the AutoThrottle extension (disabled by default)
|
||||||
# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
|
# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
|
||||||
|
|
|
@ -62,12 +62,6 @@ class SelogerSpider(scrapy.Spider):
|
||||||
'bedrooms': bedrooms,
|
'bedrooms': bedrooms,
|
||||||
'area': area
|
'area': area
|
||||||
}
|
}
|
||||||
"""
|
|
||||||
pricefloat = float(price.replace(',', '.').replace(' ', '').replace('€', ''))
|
|
||||||
areafloat = float(area.replace(',', '.').replace(' ', '').replace('m²', ''))
|
|
||||||
pricesqmint = round(pricefloat / areafloat)
|
|
||||||
print(url, type, price, rooms, bedrooms, area, pricesqmint)
|
|
||||||
"""
|
|
||||||
active_page = response.url
|
active_page = response.url
|
||||||
active_page_nb = int(active_page.split('LISTING-LISTpg=')[1])
|
active_page_nb = int(active_page.split('LISTING-LISTpg=')[1])
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue