authorize bots to read sitemap
[wikifarm.git] / install / cookbook / sitemap.php
CommitLineData
5c773b69
PC
1<?php if (!defined('PmWiki')) exit();
2/*
3 $Id: sitemap.php,v 1.7 2005/12/29 10:26:50 pts00065 Exp $
4This file is NOT part of PmWiki; still you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published
6 by the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
8
9
10This script generated a google sitemap, either automaticly or as action=sitemap or both.
11
12The automtic version creates a .gz file and is more efficient compared to the action version.
13
14google requires that the generated file is located in the root of your pmwiki installation, typically the same place as your pmwiki.php or equivalent file. Thus:
15
16http://www.brambring.nl/pmwiki.php -> sitemap must be in: http://www.brambring.nl/sitemap.xml.gz
17
18place the file in the cookbook or local directory
19and include the file from your farmconfig.php or config.php
20include_once("$FarmD/cookbook/sitemap.php");
21
22
23Then add the URL to your sitemap in the google webform or ping google
24
25automatic google ping is not implemented yet.
26
27regards
28
29bram
30
31http://www.brambring.nl
32
33 $Log: sitemap.php,v $
34 Revision 1.7 2005/12/29 10:26:50 pts00065
35 * support EnablePageListProtect
36 * Added Site to exclude pattern
37
38
39
40*/
41SDV($LastModFile, "$WorkDir/.lastmod"); # same as in caching
42SDV($SitemapFile, "sitemap.xml.gz"); #will need write access must be in root dir. Ensure dir is writable or create (symbolic) link
43
44SDV($HandleActions['sitemap'], 'HandleSitemap'); # it is not usefull to have both an action
45// and automatic creation ( SitemapDelay >= 0
46SDV($SitemapDelay, 3600); # Seconds to wait after last edit set to -1 to disable automatic generation
47SDV($SitemapSquelch, 12*3600); # Squelch between generations of sitemap
48
463f9521 49$RobotActions['sitemap'] = 1;
5c773b69
PC
50
51SDVA($SitemapSearchPatterns, array());
52$SitemapSearchPatterns[] = '!\.(All)?Recent(Changes|Uploads|Pages)$!';
53$SitemapSearchPatterns[] = '!\.Group(Print)?(Header|Footer|Attributes)$!';
54$SitemapSearchPatterns[] = '!^PmWiki\.!';
55$SitemapSearchPatterns[] = '!^Site\.!';
56$SitemapSearchPatterns[] = '!\.SideBar!';
57
58SDV($SitemapMaxItems, 50000); # maximum items to display defined by google
59SDV($SitemapMaxSize, 10); # maximum size is 10 Mbytes TODO
60SDV($SitemapPing, "http://www.google.com/"); # Use ping with long SitemapDelay (like 24*60*60 ) TODO
61
62// SDV($SitemapTimeFmt,'%Y-%m-%dT%H:%M:%sZ'); # seems to break in current version of google
63SDV($SitemapTimeFmt, '%Y-%m-%d');
64
65SDV($SiteMapItems, array());
66SDV($SitemapChannelFmt, '<?xml version="1.0" encoding="UTF-8"?>
67 <urlset xmlns="http://www.google.com/schemas/sitemap/0.84">
68 ');
69SDV($SitemapItemFmt, '
70 <url>
71 <loc>$PageUrl</loc>
72 <lastmod>$SitemapItemPubDate</lastmod>
73 <changefreq>$SitemapChangeFreq</changefreq>
74 <priority>$SitemapPriority</priority>
75 </url>');
76SDV($HandleSitemapFmt, array(&$SitemapChannelFmt, &$SitemapItems, '</urlset>'));
77
78if ( $action == 'browse' ) {
79 if ($SitemapDelay >= 0) {
80 $l = @filemtime($LastModFile);
81 $s = @filemtime($SitemapFile);
82 if ((($Now - $l) > $SitemapDelay) && ($l > $s) && (($Now - $s) > $SitemapSquelch)) {
83 $fp = @fopen($SitemapFile, "w");
84 if ($fp) {
85 ob_start();
86 MakeSitemap();
87 $x = gzencode (ob_get_clean(), 9);
88 fwrite($fp, $x);
89 fclose($fp);
90 }
91 }
92 }
93}
94
95
96
97function HandleSitemap()
98{
99 header("Content-type: text/xml");
100 MakeSitemap();
101 exit;
102}
103
104function MakeSitemap()
105{
106 global $SitemapMaxItems, $SitemapChannelFmt, $SitemapTimeFmt,
107 $SitemapItems, $SitemapItemFmt, $SearchPatterns,$FarmD,
108 $EnablePageListProtect,
109 $HandleSitemapFmt, $FmtV, $SitemapSearchPatterns, $Now;
110 global $EntitiesTable;
111 if (IsEnabled($EnablePageListProtect, 1)) $readf = 1000;
112
113 $t = array();
114 $t = @ListPages($SitemapSearchPatterns);
115 $daily_weekly = 60 * 60 * 24 * 6; #TODO
116 foreach ($t as $i => $pn) {
117 $page= ($readf >= 1000)
118 ? RetrieveAuthPage($pn, 'read', false, READPAGE_CURRENT)
119 : ReadPage($pn, READPAGE_CURRENT);
120 if (!$page) continue;
121
122 // foreach ( $page as $k => $l ) { print "$k == $l <br />\n"; }
123 if ( (count($SitemapItems) > $SitemapMaxItems)) continue;
124 $FmtV['$SitemapChangeFreq'] = ($Now - $page['time'] < $daily_weekly)?'daily':'weekly'; #TODO
125 $FmtV['$SitemapPriority'] = '0.5'; #TODO
126 $FmtV['$SitemapItemPubDate'] = gmstrftime($SitemapTimeFmt, $page['time']);
127 $SitemapItems[] = FmtPageName($SitemapItemFmt, $page['name']);
128 }
129
130 #PrintFmt('', str_replace(array_keys($EntitiesTable), array_values($EntitiesTable), $HandleSitemapFmt));
131 PrintFmt('', $HandleSitemapFmt);
132}
133
134