function smd_xml($atts, $thing=NULL) {
global $pretext, $thispage, $smd_xml_pginfo;
extract(lAtts(array(
'data' => '',
'record' => '',
'fields' => '',
'skip' => '',
'uppercase' => '0',
'convert' => '', // search:replace, search:replace, ...
'target_enc' => 'UTF-8',
'defaults' => '',
'set_empty' => '0',
'format' => '',
'form' => '',
'limit' => 0,
'offset' => 0,
'linkify' => '', // Deprecated: use format="field|link" instead
'pageform' => '',
'pagevar' => 'pg',
'pagepos' => 'below',
'wraptag' => '',
'break' => '',
'class' => '',
'delim' => ',',
'param_delim' => '|',
'concat_delim' => ' ',
'concat' => '1',
'cache_time' => '0', // in seconds
'hashsize' => '6:5',
'line_length' => '8192',
'transport' => '',
'debug' => '0',
), $atts));
$src = '';
$thing = (empty($form)) ? $thing : fetch_form($form);
if ($linkify) {
trigger_error("linkify attribute deprecated: use format=\"field|link\" instead", E_USER_NOTICE);
}
if (empty($data)) {
trigger_error("smd_xml requires a data source");
}
if (empty($record)) {
trigger_error("smd_xml requires a record name within your data stream");
}
if (empty($fields)) {
trigger_error("smd_xml requires a list of fields to extract from within your records");
}
$pagebit = $rowinfo = array();
if ($pageform) {
$pagePosAllowed = array("below", "above");
$pageform = fetch_form($pageform);
$pagepos = str_replace('smd_', '', $pagepos); // For convenience
$pagepos = do_list($pagepos, $delim);
foreach ($pagepos as $pageitem) {
$pagebit[] = (in_array($pageitem, $pagePosAllowed)) ? $pageitem : $pagePosAllowed[0];
}
}
$target_enc = (in_array($target_enc, array('ISO-8859-1', 'US-ASCII', 'UTF-8'))) ? $target_enc : 'UTF-8';
// Make a unique hash value for this instance so the XML document can be cached in txp_prefs
$uniq = '';
$md5 = md5($data.$record.$fields);
list($hashLen, $hashSkip) = explode(':', $hashsize);
for ($idx = 0, $cnt = 0; $cnt < $hashLen; $cnt++, $idx = (($idx+$hashSkip) % strlen($md5))) {
$uniq .= $md5[$idx];
}
$var_lastmod = 'smd_xml_lmod_'.$uniq;
$var_data = 'smd_xml_data_'.$uniq;
$lastmod = get_pref($var_lastmod, 0);
$read_cache = (($cache_time > 0) && ((time() - $lastmod) < $cache_time)) ? true : false;
$read_cache = gps('force_read') ? false : $read_cache; // Override cache_time
$crush = function_exists('gzcompress') && function_exists('gzuncompress');
$pagevar = ($pagevar == 'SMD_XML_UNIQUE_ID') ? $uniq : $pagevar;
// Cached document is gzipped and then (yuk!) base64'd if zlib is compiled in.
// Would prefer to store binary data directly but trying to insert it into a txp_prefs
// text field always gave problems on insertion and/or retrieval
if ($read_cache) {
if ($debug > 1) {
dmp ('++ READING CACHE '.$var_data.' ++');
}
$src = $crush ? gzuncompress(base64_decode(get_pref($var_data))) : get_pref($var_data);
} else {
if ((strpos($data, 'http:') === 0) || (strpos($data, 'https:') === 0)) {
// The data is to be fetched from a URL
if (!$transport) {
if( is_callable('fsockopen') )
$transport = 'fsock';
elseif( is_callable('curl_init') ) {
$transport = 'curl';
} else {
$transport = '';
}
}
switch ($transport) {
case 'curl':
$c = curl_init();
curl_setopt($c, CURLOPT_URL, $data);
curl_setopt($c, CURLOPT_REFERER, hu);
curl_setopt($c, CURLOPT_RETURNTRANSFER, true);
curl_setopt($c, CURLOPT_VERBOSE, false);
curl_setopt($c, CURLOPT_TIMEOUT, 10);
$src = curl_exec($c);
break;
case 'fsock':
$url = parse_url($data);
switch ($url['scheme']) {
case 'https':
$url['scheme'] = 'ssl://';
$url['port'] = 443;
break;
case 'http':
default:
$url['scheme'] = '';
$url['port'] = 80;
}
$fp = fsockopen ($url['scheme'] . $url['host'], $url['port'], $errno, $errstr, 10);
$qry = 'GET '.$url['path'] . ((isset($url['query'])) ? '?'.$url['query']: '');
$qry .= " HTTP/1.0\r\n";
$qry .= "Host: ".$url['host']."\r\n";
$qry .= "User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6\r\n\r\n"; // *shrug*
fputs($fp, $qry);
stream_set_timeout($fp, 10);
$info = stream_get_meta_data($fp);
$hdrs = true;
while ((!feof($fp)) && (!$info['timed_out'])) {
$line = fgets($fp, $line_length);
$line = preg_replace("[\r\n]", "", $line);
if ($hdrs == false) {
$src .= $line."\n";
}
if (strlen($line) == 0) $hdrs = false;
}
if ($info['timed_out']) {
$src = '';
}
fclose($fp);
break;
default:
$src = '';
}
} else {
// Assume data is presented in raw XML
$src = $data;
}
}
// Store the current document in the cache and datestamp it
if ($cache_time > 0 && !$read_cache) {
if ($debug > 1) {
dmp('++ DATA CACHED to '.$var_data.' ++');
}
$srcinfo = $crush ? base64_encode(gzcompress($src)) : doSlash($src);
set_pref($var_lastmod, time(), 'smd_xml', PREF_HIDDEN, 'text_input');
set_pref($var_data, $srcinfo, 'smd_xml', PREF_HIDDEN, 'text_input');
}
// Make up a replacement array for decoded entities...
$conversions = array();
$convert = do_list($convert, $delim);
foreach ($convert as $pair) {
if (empty($pair)) continue;
$pair = do_list($pair, $param_delim);
$conversions[$pair[0]] = $pair[1];
}
if ($debug > 1) {
echo "++ CONVERSIONS ++";
dmp($conversions);
}
// ... and replace them
$src = strtr($src, $conversions);
if ($debug > 2) {
echo "++ FILTERED SOURCE DATA ++";
dmp($src);
}
// Set up any defaults
$defaults = do_list($defaults, $delim);
$dflts = array();
foreach ($defaults as $dflt) {
if ($dflt == '') continue;
$parts = explode($param_delim, $dflt);
$dflts[$parts[0]] = $parts[1];
}
$defaults = $dflts;
// Set up any formatting
$format = do_list($format, $delim);
$formats = array();
foreach ($format as $frmdef) {
if ($frmdef == '') continue;
$parts = explode($param_delim, $frmdef);
$formats['type'][$parts[0]] = $parts[1];
for($idx = 0; $idx < count($parts)-2; $idx++) {
$formats['data'][$parts[0]][] = $parts[$idx+2];
}
}
if ($debug > 1) {
if ($defaults) {
echo "++ DEFAULTS ++";
dmp($defaults);
}
if ($formats) {
echo "++ FORMATS ++";
dmp($formats);
}
}
if (!empty($src)) {
// Paging information
$rowinfo['numrecs'] = substr_count($src, '<'.$record.'>');
$rowinfo['page_rowcnt'] = 0;
$rowinfo['limit'] = ($limit < $rowinfo['numrecs']) ? $limit : 0;
if ($offset >= 0) {
if ($offset < $rowinfo['numrecs']) {
$rowinfo['offset'] = $offset;
} else {
$rowinfo['offset'] = $rowinfo['numrecs'];
$rowinfo['limit'] = 0;
}
} else {
$negoff = $rowinfo['numrecs'] + $offset;
if ($negoff > 0) {
$rowinfo['offset'] = $negoff;
} else {
$rowinfo['offset'] = 0;
$rowinfo['limit'] = $rowinfo['numrecs'];
}
}
// Re-assign the atts in case they've been changed by reaching the bounds of the document
$offset = $rowinfo['offset'];
$limit = $rowinfo['limit'];
if ($limit > 0) {
$keepsafe = $thispage;
$rowinfo['total'] = $rowinfo['numrecs'] - $offset;
$rowinfo['numPages'] = ceil($rowinfo['total'] / $limit);
$rowinfo['pg'] = (!gps($pagevar)) ? 1 : gps($pagevar);
$rowinfo['pgoffset'] = $offset + (($rowinfo['pg'] - 1) * $limit);
$rowinfo['prevpg'] = (($rowinfo['pg']-1) > 0) ? $rowinfo['pg']-1 : '';
$rowinfo['nextpg'] = (($rowinfo['pg']+1) <= $rowinfo['numPages']) ? $rowinfo['pg']+1 : '';
$rowinfo['pagerows'] = ($rowinfo['pg'] == $rowinfo['numPages']) ? $rowinfo['total']-($limit * ($rowinfo['numPages']-1)) : $limit;
$rowinfo['unique_id'] = $uniq;
// send paging info to txp:newer and txp:older
$pageout['pg'] = $rowinfo['pg'];
$pageout['numPages'] = $rowinfo['numPages'];
$pageout['s'] = $pretext['s'];
$pageout['c'] = $pretext['c'];
$pageout['grand_total'] = $rowinfo['numrecs'];
$pageout['total'] = $rowinfo['total'];
$thispage = $pageout;
} else {
$rowinfo['pgoffset'] = $offset;
}
$rowinfo['running_rowcnt'] = $rowinfo['pgoffset']-$offset;
$rowinfo['first_rec'] = $rowinfo['running_rowcnt'] + 1;
$rowinfo['last_rec'] = ($limit > 0) ? $rowinfo['first_rec'] + $rowinfo['pagerows'] - 1 : $rowinfo['numrecs'];
if ($limit > 0) {
$rowinfo['prev_rows'] = (($rowinfo['prevpg']) ? $limit : 0);
$rowinfo['next_rows'] = (($rowinfo['nextpg']) ? (($rowinfo['last_rec']+$limit+1) > $rowinfo['total'] ? $rowinfo['total']-$rowinfo['last_rec'] : $limit) : 0);
}
if ($debug > 0) {
echo '++ PAGING INFO ++';
dmp($rowinfo);
}
// Do the dirty XML deed
$ref = new smd_xml_parser($src, $fields, $record, $skip, $uppercase, $target_enc, $linkify, $defaults, $set_empty, $formats, $thing, $delim, $param_delim, $concat, $concat_delim, $rowinfo, $debug);
$result = $ref->getResults();
// Create the page form
$pageblock = '';
$finalout = $repagements = array();
if ($rowinfo['limit'] > 0) {
$repagements['{smd_xml_totalrecs}'] = $rowinfo['total'];
$repagements['{smd_xml_pagerecs}'] = $rowinfo['pagerows'];
$repagements['{smd_xml_pages}'] = $rowinfo['numPages'];
$repagements['{smd_xml_prevpage}'] = $rowinfo['prevpg'];
$repagements['{smd_xml_thispage}'] = $rowinfo['pg'];
$repagements['{smd_xml_nextpage}'] = $rowinfo['nextpg'];
$repagements['{smd_xml_rec_start}'] = $rowinfo['first_rec'];
$repagements['{smd_xml_rec_end}'] = $rowinfo['last_rec'];
$repagements['{smd_xml_recs_prev}'] = $rowinfo['prev_rows'];
$repagements['{smd_xml_recs_next}'] = $rowinfo['next_rows'];
$repagements['{smd_xml_unique_id}'] = $rowinfo['unique_id'];
$smd_xml_pginfo = $repagements;
$pageblock = parse(strtr($pageform, $repagements));
}
// Make up the final output
if (in_array("above", $pagebit)) {
$finalout[] = $pageblock;
}
$finalout[] = doWrap($result, $wraptag, $break, $class);
if (in_array("below", $pagebit)) {
$finalout[] = $pageblock;
}
// Restore the paging outside the plugin container
if ($limit > 0) {
$thispage = $keepsafe;
}
return join('', $finalout);
} else {
return '';
}
}
class smd_xml_parser {
private $data;
private $fields;
private $rec;
private $skip;
private $cf;
private $outenc;
private $linkify;
private $defaults;
private $set_empty;
private $formats;
private $intag;
private $indata;
private $skiptag;
private $xmltag;
private $xmlatts;
private $xmldata;
private $thing;
private $out;
private $pdelim;
private $cdelim;
private $concat;
private $rowinfo;
private $show_record;
private $debug;
/**
* constructor
*/
function smd_xml_parser($data, $fields, $rec, $skip, $cf, $outenc, $linkify, $defaults, $set_empty, $formats, $thing, $delim, $pdelim, $concat, $cdelim, $rinfo, $debug=0) {
$this->data = $data;
$this->fields = do_list($fields, $delim);
$this->subfields = array();
$this->skip = do_list($skip, $delim);
$this->rec = $rec;
$this->cf = $cf; // Case folding
$this->outenc = $outenc; // Target encoding
$this->linkify = do_list($linkify, $delim);
$this->defaults = $defaults;
$this->set_empty = $set_empty;
$this->formats = $formats;
$this->thing = $thing;
$this->pdelim = $pdelim;
$this->cdelim = $cdelim;
$this->concat = $concat;
$this->rowinfo = $rinfo;
$this->debug = $debug;
$this->intag = false;
$this->exists = false;
$this->skiptag = '';
$this->xmltag = '';
$this->xmltatts = '';
$this->xmldata = array();
$this->out = array();
// Copy any subfields out of the list
foreach ($this->fields as $key => $fld) {
$sf = do_list($fld, $pdelim);
for($idx = 1; $idx < count($sf); $idx++) {
$this->subfields[$sf[0]][] = $sf[$idx];
}
$this->fields[$key] = $sf[0]; // Make sure the field only holds the zeroth entry
}
$this->parse();
}
public function getResults() {
if ($this->out) {
return $this->out;
} else {
return '';
}
}
private function parse() {
$this->data = preg_replace("/>"."[[:space:]]+"."<", $this->data); // Kill whitespace in data
$xmlparser = xml_parser_create();
xml_set_object($xmlparser, $this);
xml_parser_set_option($xmlparser, XML_OPTION_CASE_FOLDING, $this->cf);
xml_parser_set_option($xmlparser, XML_OPTION_TARGET_ENCODING, $this->outenc);
xml_set_element_handler($xmlparser, "smd_xml_start_tag", "smd_xml_end_tag");
xml_set_character_data_handler($xmlparser, "smd_xml_tag_contents");
xml_parse($xmlparser, $this->data);
xml_parser_free($xmlparser);
}
private function smd_xml_start_tag($parser, $name, $attribs) {
$pgval = $this->rowinfo['pgoffset'] - 1;
$lim = $this->rowinfo['limit'] > 0;
$this->show_record = $lim ? (($this->rowinfo['page_rowcnt'] > $pgval) && ($this->rowinfo['page_rowcnt'] <= $pgval + $this->rowinfo['pagerows'])) : $this->rowinfo['page_rowcnt'] > $pgval;
if ($name == $this->rec && $this->show_record) {
$this->intag = true;
}
if ($this->intag) {
if (in_array($name, $this->skip)) {
$this->xmltag = '';
$this->xmlatts = array();
$this->skiptag = $name;
} else {
$this->xmltag = $name;
$this->xmlatts = $attribs;
if ($this->concat && isset($this->xmldata['{'.$this->xmltag.'}'])) {
$this->exists = true;
} else {
$this->exists = false;
}
}
}
$this->indata = false;
}
private function smd_xml_end_tag($parser, $name) {
if ($name == $this->rec && $name != $this->skiptag) {
$this->intag = false;
$lim = ($this->rowinfo['limit'] > 0) ? true : false;
// Append row counter information
$this->xmldata['{smd_xml_totalrecs}'] = $lim ? $this->rowinfo['total'] : $this->rowinfo['numrecs'] - $this->rowinfo['pgoffset'];
$this->xmldata['{smd_xml_pagerecs}'] = $lim ? $this->rowinfo['pagerows'] : $this->xmldata['{smd_xml_totalrecs}'];
$this->xmldata['{smd_xml_pages}'] = $lim ? $this->rowinfo['numPages'] : 1;
$this->xmldata['{smd_xml_thispage}'] = $lim ? $this->rowinfo['pg'] : 1;
$this->xmldata['{smd_xml_thisindex}'] = $this->rowinfo['page_rowcnt'] - $this->rowinfo['offset'];
$this->xmldata['{smd_xml_thisrec}'] = $this->rowinfo['page_rowcnt'] - $this->rowinfo['offset'] + 1;
$this->xmldata['{smd_xml_runindex}'] = $this->rowinfo['running_rowcnt'];
$this->xmldata['{smd_xml_runrec}'] = $this->rowinfo['running_rowcnt'] + 1;
// Set any tag contents to a default value, if specified
if ($this->defaults) {
foreach ($this->fields as $field) {
$flist = array($field);
if (array_key_exists($field, $this->subfields)) {
$flist = array_merge($flist, $this->subfields[$field]);
}
foreach ($flist as $sfield) {
if (!isset($this->xmldata['{'.$sfield.'}'])) {
if (array_key_exists($sfield, $this->defaults)) {
$this->xmldata['{'.$sfield.'}'] = $this->defaults[$sfield];
} else if ($this->set_empty) {
$this->xmldata['{'.$sfield.'}'] = '';
}
}
}
}
}
// Reformat any fields, if specified
if ($this->formats) {
foreach ($this->fields as $field) {
$flist = array($field);
if (array_key_exists($field, $this->subfields)) {
$flist = array_merge($flist, $this->subfields[$field]);
}
foreach ($flist as $sfield) {
if (isset($this->xmldata['{'.$sfield.'}']) && array_key_exists($sfield, $this->formats['type'])) {
switch ($this->formats['type'][$sfield]) {
case 'date':
$nd = strtotime($this->xmldata['{'.$sfield.'}']);
if ($nd !== false) {
$this->xmldata['{'.$sfield.'}'] = strftime($this->formats['data'][$sfield][0], $nd);
}
break;
case 'link':
// From http://codesnippets.joyent.com/posts/show/2104
$pat = "@\b(https?://)?(([0-9a-zA-Z_!~*'().&=+$%-]+:)?[0-9a-zA-Z_!~*'().&=+$%-]+\@)?(([0-9]{1,3}\.){3}[0-9]{1,3}|([0-9a-zA-Z_!~*'()-]+\.)*([0-9a-zA-Z][0-9a-zA-Z-]{0,61})?[0-9a-zA-Z]\.[a-zA-Z]{2,6})(:[0-9]{1,4})?((/[0-9a-zA-Z_!~*'().;?:\@&=+$,%#-]+)*/?)@";
$this->xmldata['{'.$sfield.'}'] = preg_replace($pat, '\0', $this->xmldata['{'.$sfield.'}']);
break;
case 'escape':
$this->xmldata['{'.$sfield.'}'] = doSlash($this->xmldata['{'.$sfield.'}']);
break;
case 'sanitize':
if ($this->formats['data'][$sfield][0] == "url") {
$this->xmldata['{'.$sfield.'}'] = sanitizeForUrl($this->xmldata['{'.$sfield.'}']);
} else if ($this->formats['data'][$sfield][0] == "file") {
$this->xmldata['{'.$sfield.'}'] = sanitizeForFile($this->xmldata['{'.$sfield.'}']);
} else if ($this->formats['data'][$sfield][0] == "url_title") {
$this->xmldata['{'.$sfield.'}'] = stripSpace($this->xmldata['{'.$sfield.'}'], 1);
}
break;
case 'case':
for ($idx = 0; $idx < count($this->formats['data'][$sfield]); $idx++) {
if ($this->formats['data'][$sfield][$idx] == "upper") {
$this->xmldata['{'.$sfield.'}'] = strtoupper($this->xmldata['{'.$sfield.'}']);
} else if ($this->formats['data'][$sfield][$idx] == "lower") {
$this->xmldata['{'.$sfield.'}'] = strtolower($this->xmldata['{'.$sfield.'}']);
} else if ($this->formats['data'][$sfield][$idx] == "ucfirst") {
$this->xmldata['{'.$sfield.'}'] = ucfirst($this->xmldata['{'.$sfield.'}']);
} else if ($this->formats['data'][$sfield][$idx] == "ucwords") {
$this->xmldata['{'.$sfield.'}'] = ucwords($this->xmldata['{'.$sfield.'}']);
}
}
break;
}
}
}
}
}
if ($this->debug > 0) {
echo "++ REPLACEMENTS ++";
dmp($this->xmldata);
}
if ($this->show_record) {
$this->out[] = parse(strtr($this->thing, $this->xmldata));
}
// Prepare for next iteration
$this->rowinfo['running_rowcnt'] = $this->rowinfo['running_rowcnt']+1;
$this->rowinfo['page_rowcnt'] = $this->rowinfo['page_rowcnt']+1;
$this->xmldata = array();
$this->indata = false;
}
if ($name == $this->skiptag) {
$this->skiptag = '';
}
}
private function smd_xml_tag_contents($parser, $data) {
if ($this->intag && !$this->skiptag) {
if ($this->debug > 1) {
dmp($this->xmltag . " : ". $data);
}
if (in_array($this->xmltag, $this->fields)) {
// TO REMOVE
if (in_array($this->xmltag, $this->linkify)) {
$data = preg_replace('@(https?://([-\w\.]+)+(:\d+)?(/([\w/_\.]*(\?\S+)?)?)?)@', '$1', $data);
}
// END: TO REMOVE
if ($this->indata) {
if ($this->exists) {
$this->xmldata['{'.$this->xmltag.'}'] .= $this->cdelim.$data;
} else {
$this->xmldata['{'.$this->xmltag.'}'] .= $data;
}
} else {
if ($this->exists) {
$this->xmldata['{'.$this->xmltag.'}'] .= $this->cdelim.$data;
} else {
$this->xmldata['{'.$this->xmltag.'}'] = $data;
}
}
// Copy the tag to any duplicate nodes
if (array_key_exists($this->xmltag, $this->subfields)) {
foreach($this->subfields[$this->xmltag] as $copyfield) {
$this->xmldata['{'.$copyfield.'}'] = $this->xmldata['{'.$this->xmltag.'}'];
}
}
// Create any attribute nodes
if ($this->xmlatts) {
foreach ($this->xmlatts as $xkey => $xval) {
$this->xmldata['{'.$this->xmltag.$this->pdelim.$xkey.'}'] = $xval;
}
}
$this->indata = true;
}
}
}
}
// Convenience functions to check if there's a prev/next page defined. Could also use smd_if
function smd_xml_if_prev($atts, $thing) {
global $smd_xml_pginfo;
$res = $smd_xml_pginfo && $smd_xml_pginfo['{smd_xml_prevpage}'] != '';
return parse(EvalElse(strtr($thing, $smd_xml_pginfo), $res));
}
function smd_xml_if_next($atts, $thing) {
global $smd_xml_pginfo;
$res = $smd_xml_pginfo && $smd_xml_pginfo['{smd_xml_nextpage}'] != '';
return parse(EvalElse(strtr($thing, $smd_xml_pginfo), $res));
}