Source code for goldendoodle.spiders.items
# Define here the models for your scraped items
#
# See documentation in:
# <https://docs.scrapy.org/en/latest/topics/items.html>
import sys
from datetime import datetime
import scrapy
[docs]class GoldendoodleItem(scrapy.Item):
"""
**Definition of the output:**
"""
currentURL = scrapy.Field()
"""
currentURL
shows the response.url of the actual entry.
"""
currentHeaders = scrapy.Field()
"""
currentHeaders
shows the response.headers of currentURL.
"""
regex_finding = scrapy.Field()
"""
finding shows
the first occurrence of the search string in the whole text of currentURL.
- example:
<re.Match object; span=(251, 256), match='latex'> ==> LaTeX
"""
regex_findingElements = scrapy.Field()
"""
findingElements
shows the XML elements containing the search string.
"""
regex_findingElementsQuery = scrapy.Field()
"""
findingElementsQuery
describes the XPATH to the XML element with the search string.
"""
webdriver_finding = scrapy.Field()
webdriver_findingElements = scrapy.Field()
webdriver_findingElementsQuery = scrapy.Field()
cleared_webdriver_finding = scrapy.Field()
cleared_webdriver_findingElements = scrapy.Field()
cleared_webdriver_findingElementsQuery = scrapy.Field()