You can only download the top crawl error via Google Webmaster Tools at https://www.google.com/webmasters/tools/
If you want to get all crawl errors, this article is very useful.
At first, please install python and gdata if they don't exist.
$ sudo apt-get install python
$ sudo apt-get install python-pip
$ pip install gdata
then run the command:
$ python gwt_crawlerrors.py
gwt_crawlerrors.py content:
import gdata.webmastertools.service
import gdata.service
try:
from xml.etree import ElementTree as ET
except ImportError:
from elementtree import ElementTree
import atom
import getpass
username = raw_input("Username: ")
password =getpass.getpass()
domain = raw_input("Domain(e.g. uk.queryclick.com): ").replace('.', '%2E')
if domain.endswith('/'):
domain = domain[:-1]
domain = 'http%3A%2F%2F' + domain + '%2F'
url = 'https://www.google.com/webmasters/tools/feeds/%s/crawlissues/' % domain
client = gdata.webmastertools.service.GWebmasterToolsService(
email=username,
password=password, source='QueryClickWebmasterToolsPythonExample')
def main():
print 'Logging in'
client.ProgrammaticLogin()
print 'Retrieving crawl errors'
start_index = 1
total_results, entries = get_errors(start_index)
f = open('crawl_errors.csv', 'w')
save_results_csv(entries, f)
remainder = total_results - 100
progress = 'Fetching %s - %s of %s' % (start_index, start_index + 99, total_results)
print progress
while remainder > 0:
start_index += 100
total_results, entries = get_errors(start_index)
progress = 'Fetching %s - %s of %s' % (start_index, start_index + 99, total_results)
print progress
save_results_csv(entries, f)
remainder -= 100
f.close()
def get_errors(start_index):
req_url = '%s?start-index=%s&max-results=100' % (url, start_index)
res_stream = client.request('GET', req_url)
result = res_stream.read()
root = ET.fromstring(result)
total_results = int(root.findall('{http://a9.com/-/spec/opensearchrss/1.0/}totalResults')[0].text)
entries = root.findall('{http://www.w3.org/2005/Atom}entry')
return total_results, entries
def save_results_csv(entries, file):
for entry in entries:
file.write('%s, %s, %s, %s, %s\n' % (entry[5].text, entry[6].text, entry[7].text, entry[8].text, entry[9].text))
if __name__ =='__main__':main()
No comments:
Post a Comment