diff --git a/ImmoScrap/spiders/Leboncoin.py b/ImmoScrap/spiders/Leboncoin.py deleted file mode 100644 index dbc3d8e..0000000 --- a/ImmoScrap/spiders/Leboncoin.py +++ /dev/null @@ -1,36 +0,0 @@ -import scrapy - - -filters = {"category": {"id": "9"}, - "enums": {"real_estate_type": ["1", "2", "5"], "ad_type": ["offer"]}, - "ranges": {"rooms": {}, "square": {}, "price": {"min": 0, "max": 300000}}, - "location": {"area": {"lat": 45.521971, "lng": 4.869926, "radius": 1000}, "city_zipcodes": [], "departments": [], "disable_region": False, "locations": [], "regions": []}, - "keywords": {"type": "all"}, - "owner": {}} - -data = {"pivot": "0,0,0", "limit": 100, "limit_alu": 1, - "filters": filters, - "sort_by": "time", "sort_order": "desc"} - -headers = {"User-Agent": "LBC;Android;6.0;Android SDK built for x86;phone;616a1ca77ca70180;wwan;4.30.4.0;70400;3", - "api_key": "ba0c2dad52b3ec", "Content-Type": "application/json; charset=UTF-8", "Accept-Encoding": "gzip, deflate"} - - - -class LeboncoinSpider(scrapy.Spider): - name = "leboncoin" - - def start_requests(self): - urls = [ - 'https://api.leboncoin.fr/api/adfinder/v1/search', - 'http://quotes.toscrape.com/page/2/', - ] - for url in urls: - yield scrapy.Request(url=url, callback=self.parse) - - def parse(self, response): - page = response.url.split("/")[-2] - filename = 'quotes-%s.html' % page - with open(filename, 'wb') as f: - f.write(response.body) - self.log('Saved file %s' % filename) diff --git a/ImmoScrap/spiders/leboncoin.py b/ImmoScrap/spiders/leboncoin.py new file mode 100644 index 0000000..9a02827 --- /dev/null +++ b/ImmoScrap/spiders/leboncoin.py @@ -0,0 +1,59 @@ +import scrapy + + +class LeboncoinSpider(scrapy.Spider): + name = "leboncoin" + + apiurl = 'https://api.leboncoin.fr/api/adfinder/v1/search' + filters = { + "category": {"id": "9"}, # 9 : buy + "enums": { + "real_estate_type": ["1", "2", "5"], # 1: houses, 2: appartments, 5: others + "ad_type": ["offer"], + "immo_sell_type": ["old"] + }, + "ranges": {"price": {"max": 300000}}, + "location": { + "area": {"lat": 45.521971, "lng": 4.869926, "radius": 1000}, + "city_zipcodes": [], + "departments": [], + "disable_region": False, + "locations": [], + "regions": [] + }, + "keywords": {} + } + + data = { + "pivot": "0,0,0", # page cursor + "limit": 100, # number of results par page (100 is server-side max) + "limit_alu": 1, # 0 to return only statistics, 1 to also return listings + "offset": 0, + "filters": filters, + "sort_by": "time", + "sort_order": "desc" + } + + headers = { + "User-Agent": "LBC;Android;6.0;Android SDK built for x86;phone;616a1ca77ca70180;wwan;4.30.4.0;70400;3", + "Accept-Encoding": "gzip, deflate", + "Accept-Language": "en-US,en;q=0.8,fr;q=0.6", + "Referer": "https://www.leboncoin.fr/recherche", + "Origin": "https://www.leboncoin.fr" + } + + def start_requests(self): + url = self.apiurl + yield scrapy.http.JsonRequest(url=url, headers=self.headers, data=self.data, callback=self.parse) + + def parse(self, response): + json = response.json() + ads = json["ads"] + for ad in ads: + pass + + total_ads_nb = json["total"] + next_offset = self.data["offset"] + self.data["limit"] + if next_offset < total_ads_nb: # If next page + self.data["offset"] = next_offset + yield scrapy.http.JsonRequest(self.apiurl, headers=self.headers, data=self.data, callback=self.parse)