<?php
/*
Plugin Name: DirectNews XML Scheduled Importer
Plugin URI: http://support.directnews.co.uk
Description: A plugin which imports data from a DirectNews XML feed on a scheduled basis.
Version: 0.2
Author: Adfero Ltd
Author URI: http://www.adfero.co.uk
*/
/* options are deleted in case of plugin deactivation */
require_once(ABSPATH . 'wp-admin/includes/admin.php');
require_once(ABSPATH . 'wp-includes/post.php');
add_action('deactivate_dnxml-sched/dnxmlshed.php', 'dnxml_sched_deactivate');
function dnxml_sched_deactivate() {
delete_option("dnxml_sched_url");
delete_option("dnxml_sched_inseconds");
delete_option("dnxml_sched_recc");
delete_option("dnxml_sched_triggercount");
}
/* Admin options page display function is called */
add_action('admin_menu', 'dnxml_sched_add_admin_pages');
function dnxml_sched_add_admin_pages() {
add_options_page('DirectNews Scheduler', 'DirectNews Scheduler', 10, __FILE__, 'dnxml_sched_options_page');
}
/* Options sent by the options form are set here */
/* Schedules are activated and deactivated */
add_action('init', 'dnxml_sched_setoptions');
function dnxml_sched_setoptions() {
if(!empty($_POST['dnxml_sched_stop'])) {
$timestamp = wp_next_scheduled('dnxml_sched_hook', array("url" => get_option("dnxml_sched_url")));
/* This is where the event gets unscheduled */
wp_unschedule_event($timestamp, "dnxml_sched_hook", array("url" => get_option("dnxml_sched_url")));
}
if(!empty($_POST['dnxml_sched_url'])) {
update_option("dnxml_sched_url",$_POST['dnxml_sched_url']);
}
if(!empty($_POST['dnxml_sched_inseconds'])) {
update_option("dnxml_sched_inseconds",$_POST['dnxml_sched_inseconds']);
/* This is where the actual recurring event is scheduled */
if (!wp_next_scheduled('dnxml_sched_hook', array("url" => get_option("dnxml_sched_url")))) {
wp_schedule_event(time()+$_POST['dnxml_sched_inseconds'], "dnxml_sched_recc", "dnxml_sched_hook", array("url" => get_option("dnxml_sched_url")));
dnxml_sched_trigger_schedule(get_option("dnxml_sched_url"));
}
}
}
/* a reccurence has to be added to the cron_schedules array */
add_filter('cron_schedules', 'dnxml_sched_more_reccurences');
function dnxml_sched_more_reccurences($recc) {
$recc['dnxml_sched_recc'] = array('interval' => get_option("dnxml_sched_inseconds"), 'display' => 'DirectNews XML Import Schedule');
return $recc;
}
/* This is the scheduling hook for our plugin that is triggered by cron */
add_action('dnxml_sched_hook','dnxml_sched_trigger_schedule');
function dnxml_sched_trigger_schedule($url) {
//extract($url);
dnxml_sched_load_articles($url);
update_option("dnxml_sched_triggercount",get_option("dnxml_sched_triggercount")+1);
}
/* The options page display */
function dnxml_sched_options_page() {
?>
<div class=wrap>
<h2>DirectNews XML Scheduled Importer</h2>
<p></p>
<div style="padding: 10px; border: 1px solid #cccccc;">
<?php
if (wp_next_scheduled('dnxml_sched_hook', array("url" => get_option("dnxml_sched_url")))) {
?>
<p><b>DirectNews XML Importer is scheduled!</b></p>
<pre><?php
$crons = _get_cron_array();
foreach ( $crons as $timestamp => $cron ) {
if ( isset( $cron['dnxml_sched_hook'] ) ) {
echo 'Time now:'." \t\t\t".date(get_option('date_format'))." ".date("H:i:s")."<br />";
echo 'Schedule will be triggered:'." \t".date(get_option('date_format'),$timestamp)." ".date("H:i:s",$timestamp)."<br />";
}
}
?><a href="<?php bloginfo('wpurl') ?>/wp-admin/options-general.php?page=dnxmlshed.php">refresh</a><br />
</pre>
<form method="post" action="<?php echo $_SERVER["REQUEST_URI"]; ?>">
<input type="submit" name="dnxml_sched_stop" id="dnxml_sched_stop" value="To turn off importer schedules" />
</form>
<?php
if(get_option("dnxml_sched_triggercount") > 0) {
?>
<p>DirectNews XML Schedule was triggered
<?php echo get_option("dnxml_sched_triggercount");?> times.</p>
<?php
}
} else {
?>
<p>DirectNews XML Importer is NOT scheduled!</p>
<?php //dnxml_sched_load_articles(get_option("dnxml_sched_url")); ?>
<?php
}
?>
</div>
<?php
if (!wp_next_scheduled('dnxml_sched_hook', array("url" => get_option("dnxml_sched_url")))) {
?>
<br />
<form style="padding: 10px; border: 1px solid #cccccc;" method="post" action="<?php echo $_SERVER["REQUEST_URI"]; ?>">
<p>Set up a new import schedule</p><br />
DirectNews XML URL <input type="text" name="dnxml_sched_url" value="<?php echo get_option("dnxml_sched_url"); ?>" /><br />
Seconds from now until this schedule should be triggered:<br />
<input type="text" name="dnxml_sched_inseconds" value="<?php echo get_option("dnxml_sched_inseconds"); ?>" />seconds<br />
<input type="submit" name="dnxml_sched_submit" id="dnxml_sched_submit" value="Set Import Schedule" />
</form>
<?php
}
?>
</div>
<?php
}
function dnxml_sched_load_articles($url) {
global $wpdb, $post;
$articles = array ();
set_magic_quotes_runtime(0);
$doc = new DOMDocument();
//Note: load using FOpen
//Please comment the following code if cURL is used.
//$doc->load($url);
//Note: If fopen is disabled use curl
// Please uncomment the code below to use cURL .
$feedData = getFile($url,3600);
$doc->loadXML($feedData);
$xpath = new DOMXPath($doc);
// Selecting nodes matching node "Article" using xpath_eval command and passing it the xpath context
$nodes = $xpath->evaluate("//Article", $doc);
// For each nodes in the collection we loop through each node and retrive and save the data.
for ($i = 0; $i < $nodes->length; $i++) {
$date = $nodes->item($i)->getElementsByTagName('Date')->item(0)->nodeValue;
$time = $nodes->item($i)->getAttribute('Created');
$nodeTmp = $nodes->item($i);
$post_title = $nodes->item($i)->getElementsByTagName('Heading')->item(0)->nodeValue;
$post_content = $nodes->item($i)->getElementsByTagName('Contents')->item(0)->nodeValue;
$post_image = $xpath->evaluate('Picture/Large/URL', $nodes->item($i))->item(0)->nodeValue;
$post_image_caption = $xpath->evaluate('Picture/PhotoTag', $nodes->item($i))->item(0)->nodeValue;
$post_date;
$post_date_gmt;
$post_author = 1;
$post_status = 'publish';
$guid = $nodes->item($i)->getAttribute('ID');
$categories = array();;
$Catquery = 'Categories/Category';
$CatColl = $xpath->evaluate($Catquery,$nodeTmp);
$cat_index = 0;
foreach ($CatColl as $category){
$trans_tbl = get_html_translation_table(HTML_ENTITIES);
$trans_tbl = array_flip($trans_tbl);
$categories[$cat_index] = $wpdb->escape($category->nodeValue);
$cat_index++;
}
//add picture if available
if($post_image != ""){
$post_content = '[caption id="" width"280" align="right" caption= "'.$post_image_caption.'"]<img src="'.$post_image.'" alt="'.$post_image_caption.'" width="280">[/caption]'.$post_content;
}
//Do some formatting
list($day, $month, $year) = split('[/]',$date);
$post_date_gmt = $year.'-'.$month.'-'.$day.' '.$time;
$post_date_gmt = strtotime($post_date_gmt);
$post_date_gmt = gmdate('Y-m-d H:i:s', $post_date_gmt);
$post_date = get_date_from_gmt( $post_date_gmt );
$post_content = preg_replace('|<(/?[A-Z]+)|e', "'<' . strtolower('$1')", $post_content);
$post_content = str_replace('<br>', '<br />', $post_content);
$post_content = str_replace('<hr>', '<hr />', $post_content);
//$post_content = str_replace("'", "''", $post_content);
//$post_title = str_replace("'", "''", $post_title);
//Save the article to the articles array
$articles[$i] = compact('post_author', 'post_date', 'post_date_gmt', 'post_content', 'post_title', 'post_status', 'categories');
}
//import into wordpress
$cat_index = 0;
foreach ($articles as $article) {
extract($article);
if ($post_id = post_exists($post_title, $post_content, $post_date)) {
//TODO: update story
} else {
//insert new story
$post_id = wp_insert_post($article);
if ( is_wp_error( $post_id ) )
return $post_id;
if (!$post_id) {
return;
}
if (count($categories) > 0)
{
print_r($categories);
echo "t<hr/>";
//wp_create_categories($categories1, $post_id);
$catIDs1 = array();
$i=0;
foreach($categories as $cat1){
print_r($cat1);
$catIDs1[$i] = wp_create_category($cat1,5);
$i++;
}
print_r($catIDs1);
echo "b<hr/>";
echo "<hr/>";
wp_set_post_categories($post_id,$catIDs1);
}
}
$cat_index++;
}
}
function getFile($url,$timeout) {
# use CURL library to fetch remote file
$ch = curl_init();
$url = $url;
curl_setopt ($ch, CURLOPT_URL, $url);
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt ($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
$file_contents = curl_exec($ch);
if ( curl_getinfo($ch,CURLINFO_HTTP_CODE) !== 200 ) {
throw new Exception('Return Status: '.curl_getinfo($ch,CURLINFO_HTTP_CODE).', please try again after a while, could not load URL :'.$url);
return false;
} else {
return $file_contents;
}
}
?>