function smd_xml($atts, $thing=NULL) { global $pretext, $thispage, $smd_xml_pginfo; extract(lAtts(array( 'data' => '', 'record' => '', 'fields' => '', 'skip' => '', 'uppercase' => '0', 'convert' => '', // search:replace, search:replace, ... 'target_enc' => 'UTF-8', 'defaults' => '', 'set_empty' => '0', 'form' => '', 'limit' => 0, 'offset' => 0, 'linkify' => '', 'pageform' => '', 'pagevar' => 'pg', 'pagepos' => 'below', 'wraptag' => '', 'break' => '', 'class' => '', 'delim' => ',', 'param_delim' => ':', 'concat_delim' => ' ', 'concat' => '1', 'cache_time' => '0', // in seconds 'hashsize' => '6:5', 'line_length' => '8192', 'transport' => '', 'debug' => '0', ), $atts)); $src = ''; $thing = (empty($form)) ? $thing : fetch_form($form); if (empty($data)) { trigger_error("smd_xml requires a data source"); } if (empty($record)) { trigger_error("smd_xml requires a record name within your data stream"); } if (empty($fields)) { trigger_error("smd_xml requires a list of fields to extract from within your records"); } $pagebit = $rowinfo = array(); if ($pageform) { $pagePosAllowed = array("below", "above"); $pageform = fetch_form($pageform); $pagepos = str_replace('smd_', '', $pagepos); // For convenience $pagepos = do_list($pagepos, $delim); foreach ($pagepos as $pageitem) { $pagebit[] = (in_array($pageitem, $pagePosAllowed)) ? $pageitem : $pagePosAllowed[0]; } } $target_enc = (in_array($target_enc, array('ISO-8859-1', 'US-ASCII', 'UTF-8'))) ? $target_enc : 'UTF-8'; // Make a unique hash value for this instance so the XML document can be cached in txp_prefs $uniq = ''; $md5 = md5($data.$record.$fields); list($hashLen, $hashSkip) = explode(':', $hashsize); for ($idx = 0, $cnt = 0; $cnt < $hashLen; $cnt++, $idx = (($idx+$hashSkip) % strlen($md5))) { $uniq .= $md5[$idx]; } $var_lastmod = 'smd_xml_lmod_'.$uniq; $var_data = 'smd_xml_data_'.$uniq; $lastmod = get_pref($var_lastmod, 0); $read_cache = (($cache_time > 0) && ((time() - $lastmod) < $cache_time)) ? true : false; $read_cache = gps('force_read') ? false : $read_cache; // Override cache_time $crush = function_exists('gzcompress') && function_exists('gzuncompress'); $pagevar = ($pagevar == 'SMD_XML_UNIQUE_ID') ? $uniq : $pagevar; // Cached document is gzipped and then (yuk!) base64'd if zlib is compiled in. // Would prefer to store binary data directly but trying to insert it into a txp_prefs // text field always gave problems on insertion and/or retrieval if ($read_cache) { if ($debug > 1) { dmp ('++ READING CACHE '.$var_data.' ++'); } $src = $crush ? gzuncompress(base64_decode(get_pref($var_data))) : get_pref($var_data); } else { if ((strpos($data, 'http:') === 0) || (strpos($data, 'https:') === 0)) { // The data is to be fetched from a URL if (!$transport) { if( is_callable('fsockopen') ) $transport = 'fsock'; elseif( is_callable('curl_init') ) { $transport = 'curl'; } else { $transport = ''; } } switch ($transport) { case 'curl': $c = curl_init(); curl_setopt($c, CURLOPT_URL, $data); curl_setopt($c, CURLOPT_REFERER, hu); curl_setopt($c, CURLOPT_RETURNTRANSFER, true); curl_setopt($c, CURLOPT_VERBOSE, false); curl_setopt($c, CURLOPT_TIMEOUT, 10); $src = curl_exec($c); break; case 'fsock': $url = parse_url($data); switch ($url['scheme']) { case 'https': $url['scheme'] = 'ssl://'; $url['port'] = 443; break; case 'http': default: $url['scheme'] = ''; $url['port'] = 80; } $fp = fsockopen ($url['scheme'] . $url['host'], $url['port'], $errno, $errstr, 10); $qry = 'GET '.$url['path']; $qry .= " HTTP/1.0\r\n"; $qry .= "Host: ".$url['host']."\r\n"; $qry .= "User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6\r\n\r\n"; // *shrug* fputs($fp, $qry); stream_set_timeout($fp, 10); $info = stream_get_meta_data($fp); $hdrs = true; while ((!feof($fp)) && (!$info['timed_out'])) { $line = fgets($fp, $line_length); $line = preg_replace("[\r\n]", "", $line); if ($hdrs == false) { $src .= $line."\n"; } if (strlen($line) == 0) $hdrs = false; } if ($info['timed_out']) { $src = ''; } fclose($fp); break; default: $src = ''; } } else { // Assume data is presented in raw XML $src = $data; } } // Store the current document in the cache and datestamp it if ($cache_time > 0 && !$read_cache) { if ($debug > 1) { dmp('++ DATA CACHED to '.$var_data.' ++'); } $srcinfo = $crush ? base64_encode(gzcompress($src)) : doSlash($src); set_pref($var_lastmod, time(), 'smd_xml', PREF_HIDDEN, 'text_input'); set_pref($var_data, $srcinfo, 'smd_xml', PREF_HIDDEN, 'text_input'); } // Make up a replacement array for decoded entities... $conversions = array(); $convert = do_list($convert, $delim); foreach ($convert as $pair) { if (empty($pair)) continue; $pair = do_list($pair, $param_delim); $conversions[$pair[0]] = $pair[1]; } if ($debug > 1) { echo "++ CONVERSIONS ++"; dmp($conversions); } // ... and replace them $src = strtr($src, $conversions); if ($debug > 2) { echo "++ FILTERED SOURCE DATA ++"; dmp($src); } // Set up any defaults $defaults = do_list($defaults, $delim); $dflts = array(); foreach ($defaults as $dflt) { if ($dflt == '') continue; $parts = explode($param_delim, $dflt); $dflts[$parts[0]] = $parts[1]; } $defaults = $dflts; if ($debug > 1) { echo "++ DEFAULTS ++"; dmp($defaults); } if (!empty($src)) { // Paging information $rowinfo['numrecs'] = substr_count($src, '<'.$record.'>'); $rowinfo['page_rowcnt'] = 0; $rowinfo['limit'] = ($limit < $rowinfo['numrecs']) ? $limit : 0; if ($offset >= 0) { if ($offset < $rowinfo['numrecs']) { $rowinfo['offset'] = $offset; } else { $rowinfo['offset'] = $rowinfo['numrecs']; $rowinfo['limit'] = 0; } } else { $negoff = $rowinfo['numrecs'] + $offset; if ($negoff > 0) { $rowinfo['offset'] = $negoff; } else { $rowinfo['offset'] = 0; $rowinfo['limit'] = $rowinfo['numrecs']; } } // Re-assign the atts in case they've been changed by reaching the bounds of the document $offset = $rowinfo['offset']; $limit = $rowinfo['limit']; if ($limit > 0) { $keepsafe = $thispage; $rowinfo['total'] = $rowinfo['numrecs'] - $offset; $rowinfo['numPages'] = ceil($rowinfo['total'] / $limit); $rowinfo['pg'] = (!gps($pagevar)) ? 1 : gps($pagevar); $rowinfo['pgoffset'] = $offset + (($rowinfo['pg'] - 1) * $limit); $rowinfo['prevpg'] = (($rowinfo['pg']-1) > 0) ? $rowinfo['pg']-1 : ''; $rowinfo['nextpg'] = (($rowinfo['pg']+1) <= $rowinfo['numPages']) ? $rowinfo['pg']+1 : ''; $rowinfo['pagerows'] = ($rowinfo['pg'] == $rowinfo['numPages']) ? $rowinfo['total']-($limit * ($rowinfo['numPages']-1)) : $limit; $rowinfo['unique_id'] = $uniq; // send paging info to txp:newer and txp:older $pageout['pg'] = $rowinfo['pg']; $pageout['numPages'] = $rowinfo['numPages']; $pageout['s'] = $pretext['s']; $pageout['c'] = $pretext['c']; $pageout['grand_total'] = $rowinfo['numrecs']; $pageout['total'] = $rowinfo['total']; $thispage = $pageout; } else { $rowinfo['pgoffset'] = $offset; } $rowinfo['running_rowcnt'] = $rowinfo['pgoffset']-$offset; $rowinfo['first_rec'] = $rowinfo['running_rowcnt'] + 1; $rowinfo['last_rec'] = ($limit > 0) ? $rowinfo['first_rec'] + $rowinfo['pagerows'] - 1 : $rowinfo['numrecs']; if ($limit > 0) { $rowinfo['prev_rows'] = (($rowinfo['prevpg']) ? $limit : 0); $rowinfo['next_rows'] = (($rowinfo['nextpg']) ? (($rowinfo['last_rec']+$limit+1) > $rowinfo['total'] ? $rowinfo['total']-$rowinfo['last_rec'] : $limit) : 0); } if ($debug > 0) { echo '++ PAGING INFO ++'; dmp($rowinfo); } // Do the dirty XML deed $ref = new smd_xml_parser($src, $fields, $record, $skip, $uppercase, $target_enc, $linkify, $defaults, $set_empty, $thing, $delim, $param_delim, $concat, $concat_delim, $rowinfo, $debug); $result = $ref->getResults(); // Create the page form $pageblock = ''; $finalout = $repagements = array(); if ($rowinfo['limit'] > 0) { $repagements['{smd_xml_totalrecs}'] = $rowinfo['total']; $repagements['{smd_xml_pagerecs}'] = $rowinfo['pagerows']; $repagements['{smd_xml_pages}'] = $rowinfo['numPages']; $repagements['{smd_xml_prevpage}'] = $rowinfo['prevpg']; $repagements['{smd_xml_thispage}'] = $rowinfo['pg']; $repagements['{smd_xml_nextpage}'] = $rowinfo['nextpg']; $repagements['{smd_xml_rec_start}'] = $rowinfo['first_rec']; $repagements['{smd_xml_rec_end}'] = $rowinfo['last_rec']; $repagements['{smd_xml_recs_prev}'] = $rowinfo['prev_rows']; $repagements['{smd_xml_recs_next}'] = $rowinfo['next_rows']; $repagements['{smd_xml_unique_id}'] = $rowinfo['unique_id']; $smd_xml_pginfo = $repagements; $pageblock = parse(strtr($pageform, $repagements)); } // Make up the final output if (in_array("above", $pagebit)) { $finalout[] = $pageblock; } $finalout[] = doWrap($result, $wraptag, $break, $class); if (in_array("below", $pagebit)) { $finalout[] = $pageblock; } // Restore the paging outside the plugin container if ($limit > 0) { $thispage = $keepsafe; } return join('', $finalout); } else { return ''; } } class smd_xml_parser { private $data; private $fields; private $rec; private $skip; private $cf; private $outenc; private $linkify; private $defaults; private $set_empty; private $intag; private $indata; private $skiptag; private $xmltag; private $xmlatts; private $xmldata; private $thing; private $out; private $pdelim; private $cdelim; private $concat; private $rowinfo; private $show_record; private $debug; /** * constructor */ function smd_xml_parser($data, $fields, $rec, $skip, $cf, $outenc, $linkify, $defaults, $set_empty, $thing, $delim, $pdelim, $concat, $cdelim, $rinfo, $debug=0) { $this->data = $data; $this->fields = do_list($fields, $delim); $this->skip = do_list($skip, $delim); $this->rec = $rec; $this->cf = $cf; // Case folding $this->outenc = $outenc; // Target encoding $this->linkify = do_list($linkify, $delim); $this->defaults = $defaults; $this->set_empty = $set_empty; $this->thing = $thing; $this->pdelim = $pdelim; $this->cdelim = $cdelim; $this->concat = $concat; $this->rowinfo = $rinfo; $this->debug = $debug; $this->intag = false; $this->exists = false; $this->skiptag = ''; $this->xmltag = ''; $this->xmltatts = ''; $this->xmldata = array(); $this->out = array(); $this->parse(); } public function getResults() { if ($this->out) { return $this->out; } else { return ''; } } private function parse() { $this->data = preg_replace("/>"."[[:space:]]+"."<", $this->data); // Kill whitespace in data $xmlparser = xml_parser_create(); xml_set_object($xmlparser, $this); xml_parser_set_option($xmlparser, XML_OPTION_CASE_FOLDING, $this->cf); xml_parser_set_option($xmlparser, XML_OPTION_TARGET_ENCODING, $this->outenc); xml_set_element_handler($xmlparser, "smd_xml_start_tag", "smd_xml_end_tag"); xml_set_character_data_handler($xmlparser, "smd_xml_tag_contents"); xml_parse($xmlparser, $this->data); xml_parser_free($xmlparser); } private function smd_xml_start_tag($parser, $name, $attribs) { $pgval = $this->rowinfo['pgoffset'] - 1; $lim = $this->rowinfo['limit'] > 0; $this->show_record = $lim ? (($this->rowinfo['page_rowcnt'] > $pgval) && ($this->rowinfo['page_rowcnt'] <= $pgval + $this->rowinfo['pagerows'])) : $this->rowinfo['page_rowcnt'] > $pgval; if ($name == $this->rec && $this->show_record) { $this->intag = true; } if ($this->intag) { if (in_array($name, $this->skip)) { $this->xmltag = ''; $this->xmlatts = array(); $this->skiptag = $name; } else { $this->xmltag = $name; $this->xmlatts = $attribs; if ($this->concat && isset($this->xmldata['{'.$this->xmltag.'}'])) { $this->exists = true; } else { $this->exists = false; } } } $this->indata = false; } private function smd_xml_end_tag($parser, $name) { if ($name == $this->rec && $name != $this->skiptag) { $this->intag = false; $lim = ($this->rowinfo['limit'] > 0) ? true : false; // Append row counter information $this->xmldata['{smd_xml_totalrecs}'] = $lim ? $this->rowinfo['total'] : $this->rowinfo['numrecs'] - $this->rowinfo['pgoffset']; $this->xmldata['{smd_xml_pagerecs}'] = $lim ? $this->rowinfo['pagerows'] : $this->xmldata['{smd_xml_totalrecs}']; $this->xmldata['{smd_xml_pages}'] = $lim ? $this->rowinfo['numPages'] : 1; $this->xmldata['{smd_xml_thispage}'] = $lim ? $this->rowinfo['pg'] : 1; $this->xmldata['{smd_xml_thisindex}'] = $this->rowinfo['page_rowcnt'] - $this->rowinfo['offset']; $this->xmldata['{smd_xml_thisrec}'] = $this->rowinfo['page_rowcnt'] - $this->rowinfo['offset'] + 1; $this->xmldata['{smd_xml_runindex}'] = $this->rowinfo['running_rowcnt']; $this->xmldata['{smd_xml_runrec}'] = $this->rowinfo['running_rowcnt'] + 1; if ($this->defaults) { foreach ($this->fields as $field) { if (!isset($this->xmldata['{'.$field.'}'])) { if (array_key_exists($field, $this->defaults)) { $this->xmldata['{'.$field.'}'] = $this->defaults[$field]; } else if ($this->set_empty) { $this->xmldata['{'.$field.'}'] = ''; } } } } if ($this->debug > 0) { echo "++ REPLACEMENTS ++"; dmp($this->xmldata); } if ($this->show_record) { $this->out[] = parse(strtr($this->thing, $this->xmldata)); } // Prepare for next iteration $this->rowinfo['running_rowcnt'] = $this->rowinfo['running_rowcnt']+1; $this->rowinfo['page_rowcnt'] = $this->rowinfo['page_rowcnt']+1; $this->xmldata = array(); $this->indata = false; } if ($name == $this->skiptag) { $this->skiptag = ''; } } private function smd_xml_tag_contents($parser, $data) { if ($this->intag && !$this->skiptag) { if ($this->debug > 1) { dmp($this->xmltag . " : ". $data); } if (in_array($this->xmltag, $this->fields)) { if (in_array($this->xmltag, $this->linkify)) { $data = preg_replace('@(https?://([-\w\.]+)+(:\d+)?(/([\w/_\.]*(\?\S+)?)?)?)@', '$1', $data); } if ($this->indata) { if ($this->exists) { $this->xmldata['{'.$this->xmltag.'}'] .= $this->cdelim.$data; } else { $this->xmldata['{'.$this->xmltag.'}'] .= $data; } } else { if ($this->exists) { $this->xmldata['{'.$this->xmltag.'}'] .= $this->cdelim.$data; } else { $this->xmldata['{'.$this->xmltag.'}'] = $data; } } if ($this->xmlatts) { foreach ($this->xmlatts as $xkey => $xval) { $this->xmldata['{'.$this->xmltag.$this->pdelim.$xkey.'}'] = $xval; } } $this->indata = true; } } } } // Convenience functions to check if there's a prev/next page defined. Could also use smd_if function smd_xml_if_prev($atts, $thing) { global $smd_xml_pginfo; $res = $smd_xml_pginfo && $smd_xml_pginfo['{smd_xml_prevpage}'] != ''; return parse(EvalElse(strtr($thing, $smd_xml_pginfo), $res)); } function smd_xml_if_next($atts, $thing) { global $smd_xml_pginfo; $res = $smd_xml_pginfo && $smd_xml_pginfo['{smd_xml_nextpage}'] != ''; return parse(EvalElse(strtr($thing, $smd_xml_pginfo), $res)); }