Leboncoin working, need to parse now

This commit is contained in:
Jordan ERNST 2020-07-03 12:59:24 +02:00
parent 42ad03f6e4
commit 2e52581737
2 changed files with 59 additions and 36 deletions

View File

@ -1,36 +0,0 @@
import scrapy
filters = {"category": {"id": "9"},
"enums": {"real_estate_type": ["1", "2", "5"], "ad_type": ["offer"]},
"ranges": {"rooms": {}, "square": {}, "price": {"min": 0, "max": 300000}},
"location": {"area": {"lat": 45.521971, "lng": 4.869926, "radius": 1000}, "city_zipcodes": [], "departments": [], "disable_region": False, "locations": [], "regions": []},
"keywords": {"type": "all"},
"owner": {}}
data = {"pivot": "0,0,0", "limit": 100, "limit_alu": 1,
"filters": filters,
"sort_by": "time", "sort_order": "desc"}
headers = {"User-Agent": "LBC;Android;6.0;Android SDK built for x86;phone;616a1ca77ca70180;wwan;4.30.4.0;70400;3",
"api_key": "ba0c2dad52b3ec", "Content-Type": "application/json; charset=UTF-8", "Accept-Encoding": "gzip, deflate"}
class LeboncoinSpider(scrapy.Spider):
name = "leboncoin"
def start_requests(self):
urls = [
'https://api.leboncoin.fr/api/adfinder/v1/search',
'http://quotes.toscrape.com/page/2/',
]
for url in urls:
yield scrapy.Request(url=url, callback=self.parse)
def parse(self, response):
page = response.url.split("/")[-2]
filename = 'quotes-%s.html' % page
with open(filename, 'wb') as f:
f.write(response.body)
self.log('Saved file %s' % filename)

View File

@ -0,0 +1,59 @@
import scrapy
class LeboncoinSpider(scrapy.Spider):
name = "leboncoin"
apiurl = 'https://api.leboncoin.fr/api/adfinder/v1/search'
filters = {
"category": {"id": "9"}, # 9 : buy
"enums": {
"real_estate_type": ["1", "2", "5"], # 1: houses, 2: appartments, 5: others
"ad_type": ["offer"],
"immo_sell_type": ["old"]
},
"ranges": {"price": {"max": 300000}},
"location": {
"area": {"lat": 45.521971, "lng": 4.869926, "radius": 1000},
"city_zipcodes": [],
"departments": [],
"disable_region": False,
"locations": [],
"regions": []
},
"keywords": {}
}
data = {
"pivot": "0,0,0", # page cursor
"limit": 100, # number of results par page (100 is server-side max)
"limit_alu": 1, # 0 to return only statistics, 1 to also return listings
"offset": 0,
"filters": filters,
"sort_by": "time",
"sort_order": "desc"
}
headers = {
"User-Agent": "LBC;Android;6.0;Android SDK built for x86;phone;616a1ca77ca70180;wwan;4.30.4.0;70400;3",
"Accept-Encoding": "gzip, deflate",
"Accept-Language": "en-US,en;q=0.8,fr;q=0.6",
"Referer": "https://www.leboncoin.fr/recherche",
"Origin": "https://www.leboncoin.fr"
}
def start_requests(self):
url = self.apiurl
yield scrapy.http.JsonRequest(url=url, headers=self.headers, data=self.data, callback=self.parse)
def parse(self, response):
json = response.json()
ads = json["ads"]
for ad in ads:
pass
total_ads_nb = json["total"]
next_offset = self.data["offset"] + self.data["limit"]
if next_offset < total_ads_nb: # If next page
self.data["offset"] = next_offset
yield scrapy.http.JsonRequest(self.apiurl, headers=self.headers, data=self.data, callback=self.parse)