Get all crawl errors from Google Webmaster Tools

You can only download the top crawl error via Google Webmaster Tools at

If you want to get all crawl errors, this article is very useful.

At first, please install python and gdata if they don't exist.
$ sudo apt-get install python
$ sudo apt-get install python-pip
$ pip install gdata

then run the command:
$ python content:
import gdata.webmastertools.service
import gdata.service

 from xml.etree import ElementTree as ET
except ImportError:
 from elementtree import ElementTree
import atom
import getpass

username = raw_input("Username: ")
password =getpass.getpass()
domain =  raw_input("Domain(e.g. ").replace('.', '%2E')
if  domain.endswith('/'):
   domain = domain[:-1]
domain = 'http%3A%2F%2F'  + domain + '%2F'
url = '' % domain
client = gdata.webmastertools.service.GWebmasterToolsService(
       password=password, source='QueryClickWebmasterToolsPythonExample')

def main():
   print 'Logging in'
   print 'Retrieving crawl errors'
   start_index = 1
   total_results, entries = get_errors(start_index)
   f = open('crawl_errors.csv', 'w')
   save_results_csv(entries, f)
   remainder = total_results - 100
   progress =  'Fetching %s - %s of %s'  % (start_index, start_index + 99, total_results)
   print progress
   while remainder > 0:
       start_index += 100
       total_results, entries = get_errors(start_index)
       progress =  'Fetching %s - %s of %s'  % (start_index, start_index + 99, total_results)
       print progress
       save_results_csv(entries, f)
       remainder -= 100

def get_errors(start_index):
   req_url  =  '%s?start-index=%s&max-results=100' % (url, start_index)
   res_stream = client.request('GET', req_url)
   result  =
   root = ET.fromstring(result)
   total_results = int(root.findall('{}totalResults')[0].text)
   entries = root.findall('{}entry')
   return total_results, entries

def save_results_csv(entries, file):
   for entry in entries:
      file.write('%s, %s, %s, %s, %s\n' % (entry[5].text, entry[6].text, entry[7].text, entry[8].text, entry[9].text))
if  __name__ =='__main__':main()

No comments:

Post a Comment