#!/usr/local/bin/perl
#######################################################################
# Copyright 1998 Peter Janett
# This script is written by Peter Janett, scripts@newmediaone.com.
# This program CAN NOT be modified or distributed without the expressed
# written consent of the author.
# If you would like to modify or distribute this program, please email
# us at scripts@newmediaone.com.
#######################################################################
# This script is "Tipware", in that we are accepting tips for it's use.
# Tips will help in the developent of new scripts, and offset costs of
# supporting existing ones. Please mail your tip to the address below,
# or our use your credit card at http://www.newmediaone.com/tips
# Send your tips to:
# New Media One Web Services
# 3389 Hickok Pl.
# Boulder, CO 80301
# Please send comments or suggestions to scripts@newmediaone.com.
# This script works by opening up a standard reference log and printing
# only the lines that show hits from outside your site. It also prints
# then as links, so you can just click to view the sites linking to you.
# This is particualry helpful when the link is provided from a search
# engine, as you can go right to that page and see what keywords were
# used, and what listing you received.
# Please e-mail me at the address above, just to let me know you are
# using this script, to report bugs, or sugestions on how to improve this
# script.
#
# This script comes as is, use it at your own risk.
#
# Obtain permission before redistributing this software, and please leave
# this header in place. Thanks, and enjoy!
########################################################################
#First, we need to define a few basic variables
#This is the sever path to the reference log. In most cases, this shouldn't
#need to be changed
$reference_log = "$ENV{DOCUMENT_ROOT}/stats/referer_log";
#If you want links to point to a new widow or a frame, define the target here
$target = 'TARGET="_top"';
##############################################################################
#That's it, nothing below here needs to be changed, unless you have more than
#one domain pointing to your site. (If so, jump to the next section)
#Otherwise, upload it in ASCII mode to your site. Make sure it's named
#logger.cgi, and permissions are set to 755. Then, just call it
#through your browser, http://www.yourdomain.com/cgi-local/logger.cgi
##############################################################################
#
#This section ONLY needs to be adjusted if you use more than one domain to
#access your site. You need to add each domain to it's own variable below.
$domain2 = 'www21.rapidsite.net/user_id';
$domain3 = 'put_your_second_domain_here.com';
$domain4 = 'another_domain_name_if _you_have_three.org';
########################################################################
############ No configuing should be done beyond this point ############
########################################################################
$domain2 = lc($domain2);
$domain3 = lc($domain3);
$domain4 = lc($domain4);
$main_url = 'http://';
$main_url .= $ENV{"HTTP_HOST"};
$main_url = lc($main_url);
$domain = $ENV{"HTTP_HOST"};
$domain = lc($domain);
@domains = ("$domain", "$domain2", "$domain3", "$domain4");
# Check to see that the referrer log exists and is not empty
# Check to see if $reference_log exists
unless (-s "$reference_log") {
print "Content-type: text/html\n\n";
print <<"EEE";
Logger script error
Logger script error
It appears as $reference_log does not exist or is empty. This is the file that
this script uses to collect it information. If you are sure you have a referrer log,
and that it is not empty, you may need to configure this script by hand. If your
logs are cleared once a day, your log may be empty at this time.
This page generated by "Reference Logger".
Written by Peter Janett.
© 1997-98 New Media One Web Services
EEE
close (URL);
exit;
} # End unless (-s "$reference_log
#Set search engine numbers at 0 to start
$altavista_num = 0;
$excite_num = 0;
$hotbot_num = 0;
$infoseek_num = 0;
$looksmart_num = 0;
$lycos_num = 0;
$nlight_num = 0;
$webcrawler_num = 0;
$yahoo_num = 0;
#Also set totals to zero to start
$total_hits = 0;
$images_num = 0;
$non_bookmark_hits = 0;
$bookmarks_num = 0;
$other_page_num = 0;
#Print the begining header and html code
print "Content-type: text/html\n\n";
print<<"EEE";
Reference Summary for $main_url
EEE
open(LOGFILE, "$reference_log") || &error;
sub error {
print "Unable to read referer_log.
$!.\n";
print "
Please correct the path to your referer_log.\n";
print "
It's the variable named \$reference_log \n";
print " and is currently set at
$reference_log\n";
print "
Once this is corrected, all should work fine.\n";
print "
\n\n";
exit;
}
while() {
($ref_site, $local_page) = split (/\ -> /, $_);
$newhref = "$main_url$local_page";
$lc_ref_site = $ref_site;
$lc_ref_site = lc($lc_ref_site);
$lc_ref_site =~ s/http:\/\/www.//;
#Now, check each file against each domain
$external = "yes";
foreach $domains (@domains) {
$choped_domain = lc($domains);
$choped_domain =~ s/www\.//;
if($lc_ref_site =~ $choped_domain){
$external = "no";
last
}
} #end foreach $domains (@domains)
unless ($external eq "no"){
if (($local_page =~ ".gif") || ($local_page =~ ".jpg")){
$images .= qq|$ref_site
-> $local_page
|;
$images_num = $images_num + 1;
} #End if ($local_page =~ ".gif" || $local_page =~ ".jpg")
elsif (($ref_site !~ "http:") && ($ref_site !~ "https:")){
$bookmarks .= qq|$ref_site
-> $local_page
|;
$bookmarks_num = $bookmarks_num + 1;
} #End elsif ($ref_site !~ "http:")
else{
##Now count search engine hits
if ($ref_site =~ "lycos.com"){
$lycos_num = $lycos_num + 1;
$lycos_keyword = $ref_site;
$lycos_keyword =~ s/(http).*?(query=)//;
$lycos_keyword =~ s/(&).*//;
$lycos_keyword =~ tr/+/ /;
$lycos_keyword =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
$lycos_keyword_list .= qq|
$lycos_keyword
|
$local_page
|
|;
}
elsif ($ref_site =~ "hotbot.com"){
$hotbot_num = $hotbot_num + 1;
$hotbot_keyword = $ref_site;
$hotbot_keyword =~ s/(http).*?(MT=)//;
$hotbot_keyword =~ s/(&).*//;
$hotbot_keyword =~ tr/+/ /;
$hotbot_keyword =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
$hotbot_keyword_list .= qq|
$hotbot_keyword
|
$local_page
|
|;
}
elsif ($ref_site =~ "altavista.com"){
$altavista_num = $altavista_num + 1;
$altavista_keyword = $ref_site;
$altavista_keyword =~ s/(http).*?(&q=)//;
$altavista_keyword =~ s/(&).*//;
$altavista_keyword =~ tr/+/ /;
$altavista_keyword =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
$altavista_keyword_list .= qq|
$altavista_keyword
|
$local_page
|
|;
}
elsif ($ref_site =~ "yahoo.com"){
$yahoo_num = $yahoo_num + 1;
$yahoo_keyword = $ref_site;
$yahoo_keyword =~ s/(http).*?(p=)//;
$yahoo_keyword =~ s/(&).*//;
$yahoo_keyword =~ tr/+/ /;
$yahoo_keyword =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
$yahoo_keyword_list .= qq|
$yahoo_keyword
|
$local_page
|
|;
}
elsif ($ref_site =~ "excite.com"){
$excite_num = $excite_num + 1;
$excite_keyword = $ref_site;
$excite_keyword =~ s/(http).*?(search=)//;
$excite_keyword =~ s/(&).*//;
$excite_keyword =~ tr/+/ /;
$excite_keyword =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
$excite_keyword_list .= qq|
$excite_keyword
|
$local_page
|
|;
}
elsif ($ref_site =~ "looksmart.com"){
$looksmart_num = $looksmart_num + 1;
$looksmart_keyword = $ref_site;
$looksmart_keyword =~ s/(http).*?(key=)//;
$looksmart_keyword =~ s/(&).*//;
$looksmart_keyword =~ tr/+/ /;
$looksmart_keyword =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
$looksmart_keyword_list .= qq|
$looksmart_keyword
|
$local_page
|
|;
}
elsif ($ref_site =~ "northernlight.com"){
$nlight_num = $nlight_num + 1;
$nlight_keyword = $ref_site;
$nlight_keyword =~ s/(http).*?(qr=)//;
$nlight_keyword =~ s/(&).*//;
$nlight_keyword =~ tr/+/ /;
$nlight_keyword =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
$nlight_keyword_list .= qq|
$nlight_keyword
|
$local_page
|
|;
}
elsif ($ref_site =~ "webcrawler.com"){
$webcrawler_num = $webcrawler_num + 1;
$webcrawler_keyword = $ref_site;
if ($ref_site =~ "searchText="){
$webcrawler_keyword =~ s/(http).*?(searchText=)//;
}
else {
$webcrawler_keyword =~ s/(http).*?(search=)//;
}
$webcrawler_keyword =~ s/(&).*//;
$webcrawler_keyword =~ tr/+/ /;
$webcrawler_keyword =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
$webcrawler_keyword_list .= qq|
$webcrawler_keyword
|
$local_page
|
|;
}
elsif ($ref_site =~ "infoseek.com"){
$infoseek_num = $infoseek_num + 1;
$infoseek_keyword = $ref_site;
$infoseek_keyword =~ s/(http).*?(qt=)//;
$infoseek_keyword =~ s/(&).*//;
$infoseek_keyword =~ tr/+/ /;
$infoseek_keyword =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
$infoseek_keyword_list .= qq|
$infoseek_keyword
|
$local_page
|
|;
}
else {
$html_pages .= qq|
$ref_site
-> $local_page
|;
$other_page_num = $other_page_num + 1;
}
} #End else
} #End unless ($ref_site =~ $main_url)
} #End while()
#Total up all external hits
$total_hits = ($altavista_num + $excite_num + $hotbot_num + $infoseek_num
+ $looksmart_num + $lycos_num + $nlight_num + $webcrawler_num
+ $yahoo_num + $other_page_num + $images_num + $bookmarks_num);
$html_hits = ($total_hits - $images_num);
$non_bookmark_hits = ($html_hits - $bookmarks_num);
$search_engine_hits = ($altavista_num + $excite_num + $hotbot_num + $infoseek_num
+ $looksmart_num + $lycos_num + $nlight_num + $webcrawler_num
+ $yahoo_num);
print<<"EEE";
Summary of external hits for $main_url.
All keywords, URLs, and local pages are linked, so you can see
exactly where your traffic is coming from.
Total number of external hits =
$total_hits
Of these $total_hits,
$images_num were images,
and $bookmarks_num
were bookmarks or local files.
This leaves $non_bookmark_hits hits
that were not images, local files or bookmarks.
$search_engine_hits came from the
top 9 search engines, as listed below.
$altavista_num hits came from Alta Vista
|
Keywords used
|
Local page
|
$altavista_keyword_list
|
$excite_num hits came from Excite
|
Keywords used
|
Local page
|
$excite_keyword_list
|
$hotbot_num hits came from HotBot
|
Keywords used
|
Local page
|
$hotbot_keyword_list
|
$infoseek_num hits came from Infoseek
|
Keywords used
|
Local page
|
$infoseek_keyword_list
|
$looksmart_num hits came from Looksmart
|
Keywords used
|
Local page
|
$looksmart_keyword_list
|
$lycos_num hits came from Lycos
|
Keywords used
|
Local page
|
$lycos_keyword_list
|
$nlight_num hits came from Northern Light
|
Keywords used
|
Local page
|
$nlight_keyword_list
|
$webcrawler_num hits came from Webcrawler
|
Keywords used
|
Local page
|
$webcrawler_keyword_list
|
$yahoo_num hits came from Yahoo
|
Keywords used
|
Local page
|
$yahoo_keyword_list
|
$other_page_num Hits came from links on other sites
|
$html_pages
$images_num Images were used in other sites
|
$images
$bookmarks_num Hits came from local files or bookmarks
|
$bookmarks
Script written by Peter Janett
The Freelance Web Designer.
© 1997-98 Peter Janett