1: <?php
2:
3: /**
4: * Feeds modules
5: * @package modules
6: * @subpackage feeds
7: */
8:
9: if (!defined('DEBUG_MODE')) { die(); }
10:
11: /**
12: * Manage a list of feeds
13: * @subpackage feeds/lib
14: */
15: class Hm_Feed_List {
16:
17: use Hm_Server_List;
18:
19: public static function init($user_config, $session) {
20: self::initRepo('feeds', $user_config, $session, self::$server_list);
21: }
22:
23: /*
24: * Connect to an RSS/ATOM feed
25: * @param int $id server id
26: * @param array $server server details
27: * @param string $user username
28: * @param string $pass password
29: * @param array $cache server cache
30: * @return bool
31: */
32: public static function service_connect($id, $server, $user, $pass, $cache=false) {
33: self::$server_list[$id]['object'] = new Hm_Feed();
34: return self::$server_list[$id]['object'];
35: }
36:
37: /**
38: * Get a server cache
39: * @param object $session session object
40: * @param int $id server id
41: * @return bool
42: */
43: public static function get_cache($session, $id) {
44: return false;
45: }
46: }
47:
48: /**
49: * Used to cahce "read" feed item ids
50: * @subpackage feeds/lib
51: */
52: class Hm_Feed_Uid_Cache {
53: use Hm_Uid_Cache;
54: }
55:
56: /**
57: * Connect to and parse RSS/ATOM feeds
58: * @subpackage feeds/lib
59: */
60: class Hm_Feed {
61:
62: var $url;
63: var $id;
64: var $xml_data;
65: var $parsed_data;
66: var $depth;
67: var $type;
68: var $limit;
69: var $heading_block;
70: var $data_block;
71: var $update_cache;
72: var $collect;
73: var $item_count;
74: var $refresh_cache;
75: var $init_cache;
76: var $cache_limit;
77: var $sort;
78: var $status_code;
79: var $feed_type;
80:
81: /**
82: * Setup defaults
83: * @return void
84: */
85: function __construct() {
86: $this->sort = true;
87: $this->limit = 20;
88: $this->cache_limit = 0;
89: $this->status_code = false;
90: $this->url = false;
91: $this->xml_data = false;
92: $this->id = 0;
93: $this->parsed_data = array();
94: $this->depth = 0;
95: $this->feed_type = 'rss';
96: $this->heading_block = false;
97: $this->data_block = false;
98: $this->collect = false;
99: $this->refresh_cache = false;
100: $this->update_cache = false;
101: $this->init_cache = false;
102: $this->item_count = 0;
103: }
104:
105: /**
106: * Get data from a feed url
107: * @param string $url location of the feed
108: * @return string
109: */
110: function get_feed_data($url) {
111: $buffer = '';
112: if (!preg_match("?^http(|s)://?", ltrim($url))) {
113: $url = 'http://'.ltrim($url);
114: }
115: if (function_exists('curl_setopt')) {
116: $type = 'curl';
117: }
118: else {
119: $type = 'file';
120: }
121: switch ($type) {
122: case 'curl':
123: $curl_handle=curl_init();
124: curl_setopt($curl_handle, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36");
125: curl_setopt($curl_handle, CURLOPT_URL, $url);
126: curl_setopt($curl_handle, CURLOPT_CONNECTTIMEOUT,15);
127: curl_setopt($curl_handle, CURLOPT_RETURNTRANSFER,1);
128: curl_setopt($curl_handle, CURLOPT_FOLLOWLOCATION, true);
129: $buffer = trim(curl_exec($curl_handle));
130: $this->status_code = curl_getinfo($curl_handle, CURLINFO_HTTP_CODE);
131: if ($this->status_code !== false && $this->status_code !== 200) {
132: Hm_Debug::add(sprintf('BAD STATUS CODE %s from url %s', $this->status_code, $url));
133: }
134: curl_close($curl_handle);
135: unset($curl_handle);
136: break;
137: case 'file':
138: $buffer = file_get_contents($url);
139: break;
140: }
141: $this->xml_data = $buffer;
142: return $buffer;
143: }
144:
145: /**
146: * Sort feed items by date
147: * @param array $a first item
148: * @param array $b second item
149: * return int
150: */
151: function sort_by_time($a, $b) {
152: if (isset($a['dc:date']) && isset($b['dc:date'])) {
153: $adate = $a['dc:date'];
154: $bdate = $b['dc:date'];
155: }
156: elseif (isset($a['pubdate']) && isset($b['pubdate'])) {
157: $adate = $a['pubdate'];
158: $bdate = $b['pubdate'];
159: }
160: else {
161: return 0;
162: }
163: $time1 = strtotime($adate);
164: $time2 = strtotime($bdate);
165: if ($time1 == $time2) {
166: return 0;
167: }
168: elseif ($time1 < $time2) {
169: return 1;
170: }
171: else {
172: return -1;
173: }
174: }
175:
176: /**
177: * Sort a list using sort_by_time
178: * @return void
179: */
180: function sort_parsed_data() {
181: $data = $this->parsed_data;
182: usort($data, array($this, 'sort_by_time'));
183: $final_list = array();
184: $i = 1;
185: foreach ($data as $vals) {
186: $final_list[] = $vals;
187: if ($i == $this->limit) {
188: break;
189: }
190: $i++;
191: }
192: $this->parsed_data = $final_list;
193: }
194:
195: /**
196: * Parse feed content
197: * @param string $url feed location
198: * @return bool
199: */
200: function parse_feed($url) {
201: $this->get_feed_data($url);
202: if (!empty($this->parsed_data)) {
203: return true;
204: }
205: if (preg_match('/<feed .+atom/i', $this->xml_data)) {
206: $this->feed_type = 'atom';
207: }
208: $xml_parser = xml_parser_create('UTF-8');
209: xml_set_object($xml_parser, $this);
210: if ($this->feed_type == 'atom' || $this->feed_type == 'rss') {
211: xml_set_element_handler($xml_parser, $this->feed_type.'_start_element', $this->feed_type.'_end_element');
212: xml_set_character_data_handler($xml_parser, $this->feed_type.'_character_data');
213: if (xml_parse($xml_parser, $this->xml_data)) {
214: xml_parser_free($xml_parser);
215: if ($this->sort) {
216: $this->sort_parsed_data();
217: }
218: return true;
219: }
220: else {
221: Hm_Debug::add(sprintf('XML Parse error: %s', xml_error_string(xml_get_error_code($xml_parser))));
222: Hm_Debug::add($this->xml_data);
223: return false;
224: }
225: }
226: else {
227: return false;
228: }
229: }
230:
231: /**
232: * ATOM specific parsing
233: * @param object $parser xml parser
234: * @param string $tagname xml tag name
235: * @param array $attrs tag attributes
236: */
237: function atom_start_element($parser, $tagname, $attrs) {
238: if ($tagname == 'FEED') {
239: $this->heading_block = true;
240: }
241: if ($tagname == 'ENTRY') {
242: $this->heading_block = false;
243: $this->item_count++;
244: $this->data_block = true;
245: }
246: if ($this->data_block) {
247: switch ($tagname) {
248: case 'TITLE':
249: case 'SUMMARY':
250: case 'CONTENT':
251: case 'GUID':
252: case 'UPDATED':
253: case 'MODIFIED':
254: case 'ID':
255: case 'NAME':
256: $this->collect = mb_strtolower($tagname);
257: break;
258: case 'LINK':
259: if (isset($attrs['REL'])) {
260: $rel = $attrs['REL'];
261: }
262: else {
263: $rel = '';
264: }
265: $this->parsed_data[$this->item_count]['link_'.$rel] = $attrs['HREF'];
266: break;
267: }
268: }
269: if ($this->heading_block) {
270: switch ($tagname) {
271: case 'TITLE':
272: case 'UPDATED':
273: case 'LANGUAGE':
274: case 'ID':
275: $this->collect = mb_strtolower($tagname);
276: break;
277: case 'LINK':
278: if (isset($attrs['REL'])) {
279: $rel = $attrs['REL'];
280: }
281: else {
282: $rel = '';
283: }
284: $this->parsed_data[0]['link_'.$rel] = $attrs['HREF'];
285: break;
286: }
287: }
288: $this->depth++;
289: }
290:
291: /**
292: * ATOM end tag check
293: * @param object $parser xml parser
294: * @param string $tagname xml tag
295: */
296: function atom_end_element($parser, $tagname) {
297: $this->collect = false;
298: if ($tagname == 'ENTRY') {
299: $this->data_block = false;
300: }
301: $this->depth--;
302: }
303:
304: /**
305: * Collect atom character data
306: * @param object $parser xml parser
307: * @param string $data xml data
308: */
309: function atom_character_data($parser, $data) {
310: if ($this->heading_block && $this->collect) {
311: $this->parsed_data[0][$this->collect] = trim($data);
312: }
313: if ($this->data_block && $this->collect) {
314: if ($this->collect == 'updated' || $this->collect == 'modified') {
315: $this->collect = 'pubdate';
316: }
317: if (isset($this->parsed_data[$this->item_count][$this->collect])) {
318: $this->parsed_data[$this->item_count][$this->collect] .= trim($data);
319: }
320: else {
321: $this->parsed_data[$this->item_count][$this->collect] = trim($data);
322: }
323: }
324: }
325: /**
326: * Parse an RSS feed element
327: * @param object $parser xml parser
328: * @param string $tagname xml tag name
329: * @param array $attrs tag attributes
330: */
331: function rss_start_element($parser, $tagname, $attrs) {
332: if ($tagname == 'FEED') {
333: $this->heading_block = true;
334: }
335: if ($tagname == 'ITEM') {
336: $this->heading_block = false;
337: $this->item_count++;
338: $this->data_block = true;
339: }
340: if ($this->data_block) {
341: switch ($tagname) {
342: case 'TITLE':
343: case 'LINK':
344: case 'DESCRIPTION':
345: case 'GUID':
346: case 'PUBDATE':
347: case 'DC:DATE':
348: case 'DC:CREATOR':
349: case 'AUTHOR':
350: $this->collect = mb_strtolower($tagname);
351: break;
352: }
353: }
354: if ($this->heading_block) {
355: switch ($tagname) {
356: case 'TITLE':
357: case 'PUBDATE':
358: case 'LANGUAGE':
359: case 'DESCRIPTION':
360: case 'LINK':
361: $this->collect = mb_strtolower($tagname);
362: break;
363:
364: }
365: }
366: $this->depth++;
367: }
368:
369: /**
370: * RSS end tag check
371: * @param object $parser xml parser
372: * @param string $tagname xml tag
373: */
374: function rss_end_element($parser, $tagname) {
375: $this->collect = false;
376: if ($tagname == 'ITEM') {
377: $this->data_block = false;
378: }
379: $this->depth--;
380: }
381:
382: /**
383: * Collect RSS character data
384: * @param object $parser xml parser
385: * @param string $data xml data
386: */
387: function rss_character_data($parser, $data) {
388: if ($this->heading_block && $this->collect) {
389: $this->parsed_data[0][$this->collect] = $data;
390: }
391: if ($this->data_block && $this->collect) {
392: if (isset($this->parsed_data[$this->item_count][$this->collect])) {
393: $this->parsed_data[$this->item_count][$this->collect] .= $data;
394: }
395: else {
396: $this->parsed_data[$this->item_count][$this->collect] = $data;
397: }
398:
399: }
400: }
401: }
402: