|
楼主 |
发表于 2014-10-21 22:14:25
|
显示全部楼层
[mw_shl_code=php,true]<?php
header('Content-Type:text/html;charset:Utf-8');
function get_td_array($table) {
$table = preg_replace("'<table[^>]*?>'si","",$table);
$table = preg_replace("'<tr[^>]*?>'si","",$table);
$table = preg_replace("'<td[^>]*?>'si","",$table);
$table = str_replace("</tr>","{tr}",$table);
$table = str_replace("</td>","{td}",$table);
//去掉 HTML 标记
$table = preg_replace("'<[/!]*?[^<>]*?>'si","",$table);
//去掉空白字符
$table = preg_replace("'([rn])+'","",$table);
$table = preg_replace('/ /',"",$table);
$table = str_replace(" ","",$table);
$table = str_replace(" ","",$table);
$table = explode('{tr}', $table);
array_pop($table);
foreach ($table as $key=>$tr) {
$td = explode('{td}', $tr);
array_pop($td);
$td_array[] = $td;
}
return $td_array;
}
//下载HTML网页
$s=file_get_contents('http://222.88.107.92/exam/query/query_detail.jsp?examid=2014102014&paperid=100094');
$s=mb_convert_encoding("$s", "UTF-8", "GBK"); //编码转换
//获取最后一个table内容
$s=substr($s,strrpos($s,'<table'));
$s=substr($s,0,strpos($s,'</table>')+8);
//删除超链接
$s=preg_replace('|<a href.*?</a>|','',$s);
$s=preg_replace('|<td align="center" width="120">图片</td>|','',$s);
$s=preg_replace('|<td align=\'center\'></td>|','',$s);
$s=preg_replace('|<td align="center" width="120">评阅老师</td>|','',$s);
$s=preg_replace('|<td align=\'center\'>110(.*?)<br></td>|','',$s);
echo $s;
preg_match_all('/<table [^>]*>([\s\S]*?)<\/table>/',$s,$table);//用正则表达式将课表的表格取出
$arr = get_td_array($table[0][0]);//执行函数
print_r($arr);
?>
[/mw_shl_code]
|
|