linkpage5.php 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392
  1. <?php
  2. /*
  3. Template Name: Blogroll 5 High Performance
  4. */
  5. ?>
  6. <?php
  7. $GLOBALS['blogs_i_read_min_time'] = 900;
  8. $GLOBALS['external_service_min_time'] = 1800;
  9. $GLOBALS['external_service_pre_aged'] = 600;
  10. // retrieve_list();
  11. function resolveUrl($url) {
  12. try {
  13. $header = doCurlRequest($url); return $header['redirect_url']; } catch (Exception $ex) {
  14. _log("error freeing url $url: " . $ex->getMessage()); return $url;
  15. }
  16. }
  17. function curlDownload($Url){
  18. $ch = curl_init();
  19. curl_setopt($ch, CURLOPT_URL, $Url);
  20. curl_setopt($ch, CURLOPT_REFERER, "https://markus-spring.info");
  21. curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:31.0) Gecko/20100101 Firefox/31.0");
  22. curl_setopt($ch, CURLOPT_HEADER, 0);
  23. curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
  24. curl_setopt($ch, CURLOPT_TIMEOUT, 10);
  25. curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
  26. curl_setopt($ch, CURLOPT_MAXREDIRS, 10);
  27. $output = curl_exec($ch);
  28. curl_close($ch);
  29. return $output;
  30. }
  31. function reduce_lines( $html ) {
  32. $lines = explode("\n", $html);
  33. $found = 0;
  34. $bloglist = '';
  35. for ($i = 0; $i < count($lines); $i++) {
  36. if ($lines[$i] == "<ul id='BlogList1_blogs'>")
  37. $found = 1;
  38. if ($found == 1 and $lines[$i] == "</ul>")
  39. break;
  40. if ($found == 1)
  41. $bloglist .= $lines[$i] . "\n";
  42. }
  43. return($bloglist);
  44. }
  45. function make_timestamp($chunk) { //
  46. preg_match('/vor\s(\d+)\s(\D+)/m', $chunk, $m);
  47. $t = time() - 20*365*24*3600;
  48. if ($m) {
  49. // echo $m[1] . " " . $m[2] . " ";
  50. if (strpos($m[2], 'Minute') === 0)
  51. $t = time() - $m[1] * 60;
  52. if (strpos($m[2], 'Stunde') === 0)
  53. $t = time() - $m[1] * 3600;
  54. elseif (strpos($m[2], 'Tag') === 0)
  55. $t = time() - $m[1] * 24 * 3600;
  56. elseif (strpos($m[2], 'Woche') === 0)
  57. $t = time() - $m[1] * 7 * 24 * 3600;
  58. elseif (strpos($m[2], 'Monat') === 0)
  59. $t = time() - $m[1] * 30 * 24 * 3600;
  60. elseif (strpos($m[2], 'Jahr') === 0)
  61. $t = time() - $m[1] * 365 * 24 * 3600;
  62. return $t;
  63. } else
  64. return 0;
  65. }
  66. function rework_entry ($match, $list, $offset, $housekeeping) {
  67. $url = $match[1];
  68. $domain = preg_replace('/(https?:\/\/|\/<?.*?$)/m', "", $match[1]);
  69. $protocol = preg_replace('/:\/\/.*/', '://', $match[1]);
  70. $title = $match[2];
  71. $lasturl = '';
  72. $lasttitle = '';
  73. # match: [0] url [1] blog title [2] post title [3] post url [4] time indicator
  74. # echo "<pre style='font-size: 10px; font-weight: normal'>$domain: " . $match[3] . "</pre>\n";
  75. if (preg_match('`href=\'(.*?)\'.*?>\s*(.*?)\s*</a>`m', $match[3], $m)) {
  76. #echo "<pre style='font-size: 10px; font-weight: normal'>" . $match[3] . "</pre>\n";
  77. $lasturl = $m[1];
  78. $lasttitle = $m[2];
  79. /* echo "Last post 1:" . $lasturl . "\n"; */
  80. $timestamp = make_timestamp($match[4]) + $offset;
  81. } else {
  82. if ( strpos($domain, 'lfi-online.de') === 0 )
  83. list($timestamp, $lasturl, $lasttitle) = update_lfi($url, $list);
  84. if ( strpos($domain, 'jims-ramblings.blogspot.com') === 0 )
  85. list($timestamp, $lasturl, $lasttitle) = update_jims_ramblings($url, $list);
  86. if ( strpos($domain, 'www.orengrad.com') === 0)
  87. list($timestamp, $lasturl, $lasttitle) = update_oren_grad($url, $domain, $list);
  88. else {
  89. // echo "unmatched domain $domain\n";
  90. $id = array_search($domain, array_column($list, 'domain'));
  91. $timestamp = $list[$id]['timestamp'];
  92. // $timestamp = make_timestamp($match[4]) + $offset;
  93. }
  94. }
  95. return ['domain' => $domain, 'url' => $url, 'title' => $title, 'lasturl' => $lasturl,
  96. 'lasttitle' => $lasttitle, 'timestamp' => $timestamp, 'housekeeping' => $housekeeping];
  97. }
  98. function debug_echo ( $flag, $args ) {
  99. if ($flag == 1)
  100. echo "<div>" . $args . "</div>\n";
  101. }
  102. function update_oren_grad($url, $domain, $list) {
  103. $debug = 0;
  104. // foreach($list as $l)
  105. // debug_echo($l['domain']);
  106. // echo '<pre>'; echo print_r($list); echo '</pre>';
  107. $id = array_search($url, array_column($list, 'url'));
  108. // $id = array_search($domain, array_column($list, 'domain'));
  109. $lasttitle = '';
  110. if ( $id === false ) { // not found in list
  111. debug_echo($debug, "OG: $domain not found in list");
  112. preg_match('/bodycontent[\'"]>\s*<p><img src=[\'"](pictures\/.*)[\'"] alt=[\'"]/ms', curlDownload($url), $m);
  113. $timestamp = time() - $GLOBALS['external_service_pre_aged'];
  114. $lasturl = $url . '/' . $m[1];
  115. } else { // found
  116. debug_echo($debug, "OG: $domain found in list");
  117. $timestamp = $list[$id]['timestamp'];
  118. $lasturl = $list[$id]['lasturl'];
  119. }
  120. debug_echo($debug, "OG: lasturl age " . (time() - $timestamp ) . "s");
  121. if ( (time() - $timestamp ) > $GLOBALS['external_service_min_time'] ) {
  122. debug_echo($debug, "OG: $domain found in list but outdated");
  123. preg_match('/bodycontent[\'"]>\s*<p><img src=[\'"](pictures\/.*)[\'"] alt=[\'"]/ms', curlDownload($url), $m);
  124. $lasturl = $url . '/' . $m[1];
  125. if ( $lasturl == $list[$id]['lasturl'] ) {
  126. debug_echo( $debug, "OG: lasturl unchanged, timestamp unchanged: " . (time() - $timestamp) . "s");
  127. $timestamp = $list[$id]['timestamp'];
  128. } else {
  129. debug_echo($debug, "OG: lasturl CHANGED, timestamp unchanged: " . (time() - $timestamp) . "s");
  130. $timestamp = time() - $GLOBALS['external_service_pre_aged'];
  131. }
  132. }
  133. debug_echo( $debug, "OG: $domain final timestamp: " . (time() - $timestamp) . "s");
  134. return [$timestamp, $lasturl, $lasttitle];
  135. }
  136. function update_jims_ramblings($url, $list) {
  137. $id = array_search($url, array_column($list, 'url'));
  138. if ( $id === false ) { // not found in list
  139. // echo "$url not found in list\n";
  140. preg_match('/<h3 class=\'post-title entry-title\'>\s*?<a href=\'(.*?)\'>(.*?)<\/a/', curlDownload($url), $m);
  141. $timestamp = time() - $GLOBALS['external_service_pre_aged'];
  142. $lasturl = $m[1];
  143. $lasttitle = $m[2];
  144. } else { // found
  145. // echo "$url found in list\n";
  146. $timestamp = $list[$id]['timestamp'];
  147. $lasturl = $list[$id]['lasturl'];
  148. $lasttitle = $list[$id]['lasttitle'];
  149. }
  150. // echo "lasturl age " . (time() - $timestamp - 600 ) . "s\n";
  151. if ( (time() - $timestamp ) > $GLOBALS['external_service_min_time'] + 600 ) {
  152. // echo "$url found in list but outdated\n";
  153. preg_match('/<h3 class=\'post-title entry-title\'>\s*?<a href=\'(.*?)\'>(.*?)<\/a/', curlDownload($url), $m);
  154. $timestamp = time() - $GLOBALS['external_service_pre_aged'];
  155. $lasturl = $m[1];
  156. $lasttitle = $m[2];
  157. if ( $lasttitle == $list[$id]['lasttitle'] )
  158. $timestamp = $list[$id]['timestamp'];
  159. }
  160. return [$timestamp, $lasturl, $lasttitle];
  161. }
  162. function update_lfi($url, $list) {
  163. $id = array_search($url, array_column($list, 'url'));
  164. if ( $id === false ) { // not found in list
  165. // echo "$url not found in list\n";
  166. preg_match('/location = \'(.*?)\'">\s*?<h1 class="typo-\d+">(.*?)</msi', curlDownload($url), $m);
  167. $timestamp = time() - $GLOBALS['external_service_pre_aged'];
  168. } else { // found
  169. // echo "$url found in list\n";
  170. $timestamp = $list[$id]['timestamp'];
  171. $lasturl = $list[$id]['lasturl'];
  172. $lasttitle = $list[$id]['lasttitle'];
  173. }
  174. // echo "lasturl age " . (time() - $timestamp - 600 ) . "s\n";
  175. if ( (time() - $timestamp ) > $GLOBALS['external_service_min_time'] + 600 ) {
  176. // echo "$url found in list but outdated\n";
  177. preg_match('/location = \'(.*?)\'">\s*?<h1 class="typo-\d+">(.*?)</msi', curlDownload($url), $m);
  178. $timestamp = time() - $GLOBALS['external_service_pre_aged'];
  179. $lasturl = $m[1];
  180. $lasttitle = $m[2];
  181. if ( $lasttitle == $list[$id]['lasttitle'] )
  182. $timestamp = $list[$id]['timestamp'];
  183. }
  184. return [$timestamp, $lasturl, $lasttitle];
  185. }
  186. function create_linklist ($list, $housekeeping) {
  187. // echo '<pre>' . $housekeeping . '<pre>';
  188. $debug = 0;
  189. $hurl = "http://spring2life-links.blogspot.de";
  190. debug_echo($debug, $hurl);
  191. $html = curlDownload($hurl);
  192. $bloglines = reduce_lines($html);
  193. // debug_echo($debug, $bloglines);
  194. # matches: 1: url 2: blog title 3. post title 4. time indicator
  195. $re = '/<div class=\'blog-title\'>\s*<a href=\'(.*?)\'.*?>\s*(.*?)<\/a>.*?\'item-title\'>\s*(.*?)\s*<\/span.*?\'item-time\'>\s*(.*?)\s*<\/div>/ms';
  196. preg_match_all($re, $bloglines, $matches, PREG_SET_ORDER, 0);
  197. $offset = 0;
  198. for ($i = 0; $i < count($matches); $i++) {
  199. $result = rework_entry($matches[$i], $list, $offset, $housekeeping);
  200. // print_r($result['domain'] . "\n");
  201. $id = array_search( $result['domain'], array_column( $list, 'domain' ));
  202. // debug_echo($debug, $result['domain']);
  203. if ( is_int($id) ) {
  204. $list[$id]['lasturl'] = $result['lasturl'];
  205. $list[$id]['lasttitle'] = $result['lasttitle'];
  206. $list[$id]['timestamp'] = $result['timestamp'];
  207. $list[$id]['housekeeping'] = $housekeeping;
  208. } else {
  209. array_push($list, $result);
  210. }
  211. $offset += 1;
  212. }
  213. $hk = $housekeeping - 2;
  214. $op = array_search('www.ruedulavoir.com', array_column($list, 'domain'));
  215. echo "<pre>$op</pre>";
  216. unset($list[$op]);
  217. // while ( $hk < $housekeeping ) {
  218. // debug_echo($debug, $hk);
  219. // while ( is_int($to_delete = array_search($hk, array_column($list, 'housekeeping' ))) ) {
  220. // echo '<pre style="color:green;">' . $hk . ' ';
  221. // echo print_r( $list[$to_delete] );
  222. // echo '</pre>';
  223. // unset($list[$to_delete]);
  224. // }
  225. // // while ( $to_delete = array_search($hk, array_column($list, 'housekeeping' )) !== NULL) {
  226. // // debug_echo(1, $to_delete);
  227. // // unset($list[$to_delete]);
  228. // // }
  229. // $hk = $hk + 1;
  230. // }
  231. return $list;
  232. }
  233. function retrieve_list() {
  234. $debug = 0;
  235. $time1 = (float) microtime(true);
  236. // echo "getting content from stored list " . (time() - filemtime('/tmp/file.ser')) . "s\n";
  237. // $str = file_get_contents('/tmp/file.ser');
  238. // $str = get_transient( $hurl + "_hp" );
  239. // if ( $str === false ) { // not found
  240. // $list = create_linklist([]);
  241. // } else { // found
  242. // // $list = unserialize( $str );
  243. // $list = create_linklist($str);
  244. // }
  245. $queried_object = get_queried_object();
  246. if ( $queried_object ) {
  247. $post_id = $queried_object->ID;
  248. }
  249. // $post_id = 23342; // for testing, fixed to the blogs-i-read page id
  250. debug_echo($debug, $post_id);
  251. $cache_key = '_blogs_i_read';
  252. $cache = get_post_meta( $post_id, $cache_key, true );
  253. $housekeeping = -1;
  254. // $cache = [];
  255. if ( empty( $cache ) || $cache['expires'] < time() ) {
  256. debug_echo($debug, 'empty or expired');
  257. if (empty($cache)) {
  258. $list = [];
  259. $housekeeping = 0;
  260. } else {
  261. debug_echo($debug, 'expired');
  262. $housekeeping = $cache['housekeeping'] + 1;
  263. debug_echo($debug, 'housekeeping:' . $housekeeping);
  264. $list = $cache['data'];
  265. }
  266. $list = create_linklist($list, $housekeeping);
  267. debug_echo($debug, 'after creating linklist');
  268. $cache = array(
  269. 'expires' => time() + $GLOBALS['blogs_i_read_min_time'],
  270. 'data' => $list,
  271. 'housekeeping' => $housekeeping );
  272. update_post_meta( $post_id, $cache_key, $cache );
  273. debug_echo($debug, 'after updating post meta');
  274. } else {
  275. debug_echo($debug, 'neither empty nor expired');
  276. $housekeeping = $cache['housekeeping'];
  277. create_linklist($cache['data'], $housekeeping);
  278. }
  279. debug_echo($debug, 'before printing');
  280. return(print_list( $cache['data'], $time1 ));
  281. }
  282. function print_list ( $list, $time1 ) {
  283. $list2 = array_msort($list, array('timestamp' => SORT_DESC));
  284. $s = '';
  285. $dayflag = $weekflag = $monthflag = $yearflag = false;
  286. foreach ($list2 as $l) {
  287. if ( ! $dayflag and ((time() - $l['timestamp']) > 24*3600) ) {
  288. $s .= "<!-- " . (time() - $l['timestamp']) . " // " . 24*3600 . " -->\n";
  289. $s .= "</ul>\n<li style='font-weight: bold;'>A day and older</li>\n<ul>\n";
  290. $dayflag = true;
  291. } elseif ( ! $weekflag and ((time() - $l['timestamp']) > 7*24*3600) ) {
  292. $s .= "</ul>\n<li style='font-weight: bold;'>Less than a month</li>\n<ul>\n";
  293. $weekflag = true;
  294. } elseif ( ! $monthflag and ((time() - $l['timestamp']) > 30*24*3600) ) {
  295. $s .= "</ul>\n<li style='font-weight: bold;'>Older</li>\n<ul>\n";
  296. $monthflag = true;
  297. } elseif ( ! $yearflag and ((time() - $l['timestamp']) > 365*24*3600) ) {
  298. $s .= "</ul>\n<li style='font-weight: bold;'>From medieval times</li>\n<ul>\n";
  299. $yearflag = true;
  300. }
  301. $s .= '<li>';
  302. // $s .= '<span style="color:green">' . (time() - $l['timestamp']) . '</span>; ';
  303. // $s .= '<span style="color:blue">' . $l['housekeeping'] . '</span>; ';
  304. $s .= '<a target="_blank" href="' . $l['url'] . '">' . $l['title'] . '</a> // <a target="_blank" href="'
  305. . $l['lasturl'] . '">' . $l['lasttitle'] . "</a></li>\n";
  306. }
  307. $timespent = microtime(true) - $time1;
  308. return $s . "<!-- Duration $timespent sec -->\n";
  309. }
  310. // function store_list($list) {
  311. // // anpassen für wordpress, zum testen schreiben in Datei
  312. // // $ser = serialize($list);
  313. // // $file = fopen('/tmp/file.ser', 'wb');
  314. // // fwrite($file, $ser);
  315. // set_transient( $hurl + "_hp", $list, 600 );
  316. // }
  317. function array_msort($array, $cols) {
  318. $colarr = array();
  319. foreach ($cols as $col => $order) {
  320. $colarr[$col] = array();
  321. foreach ($array as $k => $row) { $colarr[$col]['_'.$k] = strtolower($row[$col]); }
  322. }
  323. $eval = 'array_multisort(';
  324. foreach ($cols as $col => $order) {
  325. $eval .= '$colarr[\''.$col.'\'],'.$order.',';
  326. }
  327. $eval = substr($eval,0,-1).');';
  328. eval($eval);
  329. $ret = array();
  330. foreach ($colarr as $col => $arr) {
  331. foreach ($arr as $k => $v) {
  332. $k = substr($k,1);
  333. if (!isset($ret[$k])) $ret[$k] = $array[$k];
  334. $ret[$k][$col] = $array[$k][$col];
  335. }
  336. }
  337. return $ret;
  338. }
  339. ?>
  340. <?php get_header(); ?>
  341. <div id="content" class="site-content">
  342. <div id="primary" class="content-area">
  343. <main id="main" class="site-main" role="main">
  344. <article id="post--31201" class="post--31201 post type-post status-publish format-image hentry category-vernacular post_format-post-format-image entry">
  345. <header class="entry-header">
  346. <h2 class="entry-title default-max-width">Blogs I read</h2>
  347. </header><!-- .entry-header -->
  348. <div id="content" class="site-content">
  349. <div id="primary" class="content-area">
  350. <main id="main" class="site-main" role="main">
  351. <div class="entry-content">
  352. <ul class="linktable">
  353. <li style="font-weight: bold;">Hot from the Sphere</li>
  354. <ul>
  355. <?php echo retrieve_list (); ?>
  356. </ul>
  357. </ul>
  358. </div><!-- .entry-content -->
  359. </main>
  360. </div>
  361. </div>
  362. </article><!-- #post-31164 -->
  363. </main><!-- #main -->
  364. </div><!-- #primary -->
  365. </div><!-- #content -->
  366. <?php get_footer(); ?>
  367. <?php
  368. // Local variables:
  369. // compile-command: "php linkpage5.php"
  370. // End:
  371. ?>