Poor Man's Backlink Checker

Discussion in 'Link Development' started by directorycollector, Oct 14, 2006.

  1. #1
    In another thread I said I was writing a backlink checker in JScript running in WSH. It's kinda slow because it only utilizes a single thread, I will probably add multi-threading in the next version. But to show how scripts can help automate a webmaster's daily tasks, here you go:

    
    // JScript.
    
    /*****************************************************************
     * Poor Man's Backlink Checker.
     *
     * Author: George Asprey
     *
     * Owner of: http://directory.proud-collector.com/ -- Free directory
     *      and: http://www.proud-collector.com/
     *
     * This script requires Internet Explorer, and was tested
     * on WSH 5.6 - XP Media Center with IE 7.0
     *****************************************************************/
    
    function addURL(theURL) {
    	var addURL = true;
    
    	if (theURL.indexOf(theDomain) == -1)
    		// theURL is an external URL so don't spider
    		addURL = false;
    	else
    		// make sure theURL wasn't already spidered
    		for (var i=0; i<theURLs.length; i++)
    			if (theURL == theURLs[i]) {
    				// theURL was already spidered
    				addURL = false;
    				break;
    			}
    
    	if (addURL)
    		// theURL is a new internal link
    		theURLs[theURLs.length] = theURL;
    }
    
    function parsePage(theURL) {
    	IE.Navigate(theURL);
    	while (IE.ReadyState < 4) WScript.Sleep(10);
    
    	theDoc = IE.document;
    	while (theDoc.readyState != "complete") WScript.Sleep(10);
    
    	for (var i=0; i<theDoc.links.length; i++) {
    		if (theDoc.links[i].href.indexOf(theSearchDomain) == -1)
    			// this link is not the one we are
    			// looking for so add it to the list
    			addURL(theDoc.links[i].href);
    		else {
    			theSearchDomainFound = true;
    			break;
    		}
    	}
    }
    
    /*****************************************************************
     * Main code for Poor Man's Backlink Checker.
     * This script uses only one thread to check a single
     * domain for a single backlink.
     * Future improvement (Not So Poor Man's Backlink Checker)
     * will utilize multiple threads (or at least I hope so).
     *****************************************************************/
    
    var IE = WScript.CreateObject("internetexplorer.application");
    IE.top = 0;
    IE.left = 0;
    IE.width = 800;
    IE.height = 570;
    IE.visible = true;
    
    var theSearchDomainFound = false;
    var theURLs = new Array();
    var theURLsIndex = 0;
    var maxURLCount = 2000;
    
    // this is the domain where your link should be
    var theDomain = "www.backlinkdomain.com"
    // this is the your domain (what we are searching for)
    var theSearchDomain = "www.mydomain.com"
    
    // prime the array with the first URL
    theURLs[0] = "http://"+theDomain+"/"
    
    // start spidering the domain
    do {
    	parsePage(theURLs[theURLsIndex++]);
    } while (theURLsIndex < theURLs.length && theURLsIndex < maxURLCount && theSearchDomainFound == false);
    
    IE.Quit();
    
    if (theSearchDomainFound)
    	WScript.ECHO("The domain: "+theSearchDomain+" was found.");
    else
    	WScript.ECHO("The domain: "+theSearchDomain+" was not found.");
    
    WScript.Quit();
    
    Code (markup):
     
    directorycollector, Oct 14, 2006 IP
    poseidon likes this.
  2. dmoore

    dmoore Peon

    Messages:
    26
    Likes Received:
    0
    Best Answers:
    0
    Trophy Points:
    0
    #2
    Thanks for sharing!! I am really interested in the code when you get the multi-theading working.

    Derek
     
    dmoore, Oct 15, 2006 IP