authorize bots to read sitemap
[wikifarm.git] / install / cookbook / sitemap.php
1 <?php if (!defined('PmWiki')) exit();
2 /*
3 $Id: sitemap.php,v 1.7 2005/12/29 10:26:50 pts00065 Exp $
4 This file is NOT part of PmWiki; still you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published
6 by the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
8
9
10 This script generated a google sitemap, either automaticly or as action=sitemap or both.
11
12 The automtic version creates a .gz file and is more efficient compared to the action version.
13
14 google requires that the generated file is located in the root of your pmwiki installation, typically the same place as your pmwiki.php or equivalent file. Thus:
15
16 http://www.brambring.nl/pmwiki.php -> sitemap must be in: http://www.brambring.nl/sitemap.xml.gz
17
18 place the file in the cookbook or local directory
19 and include the file from your farmconfig.php or config.php
20 include_once("$FarmD/cookbook/sitemap.php");
21
22
23 Then add the URL to your sitemap in the google webform or ping google
24
25 automatic google ping is not implemented yet.
26
27 regards
28
29 bram
30
31 http://www.brambring.nl
32
33 $Log: sitemap.php,v $
34 Revision 1.7 2005/12/29 10:26:50 pts00065
35 * support EnablePageListProtect
36 * Added Site to exclude pattern
37
38
39
40 */
41 SDV($LastModFile, "$WorkDir/.lastmod"); # same as in caching
42 SDV($SitemapFile, "sitemap.xml.gz"); #will need write access must be in root dir. Ensure dir is writable or create (symbolic) link
43
44 SDV($HandleActions['sitemap'], 'HandleSitemap'); # it is not usefull to have both an action
45 // and automatic creation ( SitemapDelay >= 0
46 SDV($SitemapDelay, 3600); # Seconds to wait after last edit set to -1 to disable automatic generation
47 SDV($SitemapSquelch, 12*3600); # Squelch between generations of sitemap
48
49 $RobotActions['sitemap'] = 1;
50
51 SDVA($SitemapSearchPatterns, array());
52 $SitemapSearchPatterns[] = '!\.(All)?Recent(Changes|Uploads|Pages)$!';
53 $SitemapSearchPatterns[] = '!\.Group(Print)?(Header|Footer|Attributes)$!';
54 $SitemapSearchPatterns[] = '!^PmWiki\.!';
55 $SitemapSearchPatterns[] = '!^Site\.!';
56 $SitemapSearchPatterns[] = '!\.SideBar!';
57
58 SDV($SitemapMaxItems, 50000); # maximum items to display defined by google
59 SDV($SitemapMaxSize, 10); # maximum size is 10 Mbytes TODO
60 SDV($SitemapPing, "http://www.google.com/"); # Use ping with long SitemapDelay (like 24*60*60 ) TODO
61
62 // SDV($SitemapTimeFmt,'%Y-%m-%dT%H:%M:%sZ'); # seems to break in current version of google
63 SDV($SitemapTimeFmt, '%Y-%m-%d');
64
65 SDV($SiteMapItems, array());
66 SDV($SitemapChannelFmt, '<?xml version="1.0" encoding="UTF-8"?>
67 <urlset xmlns="http://www.google.com/schemas/sitemap/0.84">
68 ');
69 SDV($SitemapItemFmt, '
70 <url>
71 <loc>$PageUrl</loc>
72 <lastmod>$SitemapItemPubDate</lastmod>
73 <changefreq>$SitemapChangeFreq</changefreq>
74 <priority>$SitemapPriority</priority>
75 </url>');
76 SDV($HandleSitemapFmt, array(&$SitemapChannelFmt, &$SitemapItems, '</urlset>'));
77
78 if ( $action == 'browse' ) {
79 if ($SitemapDelay >= 0) {
80 $l = @filemtime($LastModFile);
81 $s = @filemtime($SitemapFile);
82 if ((($Now - $l) > $SitemapDelay) && ($l > $s) && (($Now - $s) > $SitemapSquelch)) {
83 $fp = @fopen($SitemapFile, "w");
84 if ($fp) {
85 ob_start();
86 MakeSitemap();
87 $x = gzencode (ob_get_clean(), 9);
88 fwrite($fp, $x);
89 fclose($fp);
90 }
91 }
92 }
93 }
94
95
96
97 function HandleSitemap()
98 {
99 header("Content-type: text/xml");
100 MakeSitemap();
101 exit;
102 }
103
104 function MakeSitemap()
105 {
106 global $SitemapMaxItems, $SitemapChannelFmt, $SitemapTimeFmt,
107 $SitemapItems, $SitemapItemFmt, $SearchPatterns,$FarmD,
108 $EnablePageListProtect,
109 $HandleSitemapFmt, $FmtV, $SitemapSearchPatterns, $Now;
110 global $EntitiesTable;
111 if (IsEnabled($EnablePageListProtect, 1)) $readf = 1000;
112
113 $t = array();
114 $t = @ListPages($SitemapSearchPatterns);
115 $daily_weekly = 60 * 60 * 24 * 6; #TODO
116 foreach ($t as $i => $pn) {
117 $page= ($readf >= 1000)
118 ? RetrieveAuthPage($pn, 'read', false, READPAGE_CURRENT)
119 : ReadPage($pn, READPAGE_CURRENT);
120 if (!$page) continue;
121
122 // foreach ( $page as $k => $l ) { print "$k == $l <br />\n"; }
123 if ( (count($SitemapItems) > $SitemapMaxItems)) continue;
124 $FmtV['$SitemapChangeFreq'] = ($Now - $page['time'] < $daily_weekly)?'daily':'weekly'; #TODO
125 $FmtV['$SitemapPriority'] = '0.5'; #TODO
126 $FmtV['$SitemapItemPubDate'] = gmstrftime($SitemapTimeFmt, $page['time']);
127 $SitemapItems[] = FmtPageName($SitemapItemFmt, $page['name']);
128 }
129
130 #PrintFmt('', str_replace(array_keys($EntitiesTable), array_values($EntitiesTable), $HandleSitemapFmt));
131 PrintFmt('', $HandleSitemapFmt);
132 }
133
134