Why can grow memory consumption Scrapy?

prefs() says that most of the memory occupied by objects Selector() and Response().
Many are 10-12 GB for a few hours.
Work with scrapy so (base spider):
class BaseAcrossSearchSpider(BaseAcrossSearchMixin, BaseSpider):
 ITEMS_OBJECTS: str = "
 ITEM_URL_OBJECT: str = "
 NEXT_BUTTON_OBJECT: str = "
 CONTINUE_IF_NEXT_BUTTON_OBJECT_IS: bool = True

 def __init__(self, *args, **kwargs):
 super().__init__(*args, **kwargs)
 self._last_page: int = self.START_PAGE

 def start_requests(self) -> None:
 yield Request(self._search_url(self.START_PAGE), callback=self.parse)

 def parse(self, response: Response) -> Any:
 for short_item in response.css(self.ITEMS_OBJECTS):
 yield Request(
self._page_url(short_item.css(self.ITEM_URL_OBJECT).extract_first()),
self._process_item(short_item)
)

 if self.CONTINUE_IF_NEXT_BUTTON_OBJECT_IS is bool(response.css(self.NEXT_BUTTON_OBJECT)):
 yield Request(self._search_url(self._last_page + 1), self.parse)

 def _process_item(self, short_item: Selector) -> Callable:
 def wrapper(response: Response):
"""
 downloader of medlar checks went whether the spider is already at this address (redis).
 If you go, returns an empty Response()
"""
 if response.body:
 return self._parse(self.FULL_MODEL,
self.full_loader,
response=response,
url=response.url
utc_created_at=datetime.utcnow(),
utc_actually_at=datetime.utcnow())
else:
 return self._parse(self.SHORT_MODEL,
self.short_loader,
selector=short_item,
url=response.url
utc_actually_at=datetime.utcnow())
 return wrapper

 def _parse(self,
 model: dict
loader
 selector: Selector = None,
 response: Response = None,
**kwargs):

 if not selector and response:
 selector = response.selector

 loader = loader(item=self.item(), selector=selector)

 for element, handler in the model.items():
 if callable(handler):
 deque(map(loader.add_value, element, handler(selector)))
else:
 loader.add_css(element, handler)

 for k, v in kwargs.items():
 loader.add_value(k, v)

 return loader.load_item()

 def _search_url(self, page: Optional[int]) -> str:
 ...

(spider child)
class ChildaSpider(ChildaMixin, BaseAcrossSearchSpider):
 SHORT_MODEL = {
 ('price_base', 'price_total'): _get_prices,
}

 FULL_MODEL = {
 'price_base': 'p.basePrice__price span::text',
 'price_total': 'p.totalPrice__price span::text',
...
 ('body_type', 'color', 'vin', 'engine_size', 'engine_type', 'drive_type',
 'steering_location', 'transmission', 'passengers_count', 'doors_count'):
 _get_elements_from_table(range(1, 11), 2)
}

 ITEMS_OBJECTS = 'div.casetMain'
 ITEM_URL_OBJECT = 'a::attr("href")'
 NEXT_BUTTON_OBJECT = 'button.btnFunc pager__btn__next[disabled]'
 CONTINUE_IF_NEXT_BUTTON_OBJECT_IS = False


Somewhere I am doing something wrong?
March 23rd 20 at 18:42
1 answer
March 23rd 20 at 18:44
Solution
In General, as I understand, my memory is not tech.

The problem is the algorithm, method, parse() and the settings that I used for parsing. Values concurrency of queries and threads I have are too high and the value DEPTH_PRIORITY was set to default (0).

Thus it turned out that the search page was parcelis faster than generated items based on the records they were created a long queue and memory overflow. Helped setting DEPTH_PRIORITY = 1.
However, the speed of parsing, unfortunately, declined.
Start two spiders in two different processes has slightly improved the situation.

Find more questions by tags ScrapyPython