在工作中,經常需要對網頁上的表格內容進行處理,但是,由于表格內容制作過程中的隨意性,跨行跨列經常發生,所以我作了這幾個函數,以獲取表格的內容,程序中重要的地方已作了注解,所以在此不再重復說明,經過測試,非常成功.所以拿出來供大家共享.
<?php
//作者: 王朋武
//日期: 2005,3,31
//目的: 獲取網頁上(任意跨行跨列的)表格中的內容
function fun_proc_rowspan($l_str)
{
$l_a = explode("/n", $l_str);
for($i=count($l_a)-1; $i>=0; $i--) {
$l_str = trim($l_a[$i]);
if(empty($l_str)) continue;
$l_str = eregi_replace("<td", "<b><td", $l_str);
$l_b = explode("<b>", $l_str); //如有n個<b>,則分成(n+1)個組,最開始一項為空.
for($j=0; $j<count($l_b); $j++) { /*不能反過來(即從右開始),否則在有些情況下會有錯誤,因為在表格的一行中如有幾個連續的格跨行,則必須從左開始,否則下一行的列數就可能不夠計算,即計算跨行時保存的列有可能大于下一行的總列數, 則操作就不會成功. 實測也證明.
如 +----+----+----+----+----+----+----+
+----+----+----+----+----+----+----+此一行保存7格, 前6格都有rowspan=2
+-----------------------------+----+此一行只保存2格
+----------------------------------+此一行只保存1格
*/
$l_str = trim($l_b[$j]);
if(eregi("rowspan", $l_str)) {
$rowspan = preg_replace("/^<td.+rowspan=[/"/']?(/d{1,2})[/"/']?.*/", "
$rowcont = preg_replace("/^<td.+rowspan=[/"/']?/d{1,2}[/"/']?.*>(.+)<//td>/", " $l_a = fun_add_row_td($l_a, $i, $j, $rowspan, $rowcont);
$l_str = implode("/n", $l_a);
return $l_str;
}
}//end of for j
}//end of for i
return $l_str;
}function fun_add_row_td($l_a, $r, $l, $add_n, $add_cont) //l_a是數組, r是插入的開始行,l是開始的列, add_n是跨多少行, add_cont是加的內容
{
for($i=$r; $i<$r+$add_n; $i++) {
if($i == $r) {
$l_str = eregi_replace("<td", "<b><td", $l_a[$i]);
$l_b = explode("<b>", $l_str);
$l_c = "";
for($j=1; $j<count($l_b); $j++) {
if($j == $l) $l_c .= eregi_replace("rowspan", "", $l_b[$j]); //去掉rowspan
else $l_c .= $l_b[$j];
}
$l_a[$i] = $l_c;
continue;
}
$l_str = eregi_replace("<td", "<b><td", $l_a[$i]); //加<b>分隔符
$l_b = explode("<b>", $l_str); //以<b>分組
$l_c = "";
for($j=1; $j<$l+$add_n; $j++) { //略過開始的空項,從1開始
if($j == $l) {
$l_c .= "<td>".$add_cont."</td>";
}
$l_c .= $l_b[$j];
}//end of for j
$l_a[$i] = $l_c; //更新后的新內容
}//end of for i
return $l_a;
}
function fun_proc_colspan($l_str)
{
$l_a = explode("/n", $l_str);
for($i=0; $i<count($l_a); $i++) {
$l_str = trim($l_a[$i]);
if(empty($l_str)) continue;
$l_str = eregi_replace("<td", "<b><td", $l_str);
$l_b = explode("<b>", $l_str); //如有n個<b>,則分成(n+1)個組,最開始一項為空.
for($j=1; $j<count($l_b); $j++) { //此處不能反過來,必須從左到右取表格內容
$l_str = trim($l_b[$j]);
if(eregi("<td.+colspan", $l_str)) {
$colspan = preg_replace("/^<td.+colspan=[/"/']?(/d{1,2})[/"/']?.*/", "
$colcont = preg_replace("/^<td.+colspan=[/"/']?/d{1,2}[/"/']?.*>(.+)<//td>/", " $l_a = fun_add_col_td($l_a, $i, $j, $colspan, $colcont);
$l_str = implode("/n", $l_a);
return $l_str;
}
}//end of for j
}//end of for i
return $l_str;
}function fun_add_col_td($l_a, $r, $l, $add_n, $add_cont) //l_a是數組, r是插入的開始行,l是開始的列, add_n是跨多少列, add_cont是加的內容
{
$l_str = eregi_replace("<td", "<b><td", $l_a[$r]); //加<b>分隔符
$l_b = explode("<b>", $l_str); //以<b>分組
$l_c = "";
for($j=1; $j<=count($l_b); $j++) { //略過開始的空項,從1開始
if($j == $l) {
$l_c .= eregi_replace("colspan", "", $l_b[$j]); //去掉colspan
continue;
}
if($j == $l+1) {
for($k=0; $k<$add_n-1; $k++)
$l_c .= "<td>".$add_cont."</td>";
}
$l_c .= $l_b[$j];
}//end of for j
$l_a[$r] = $l_c; //更新后的新內容
return $l_a;
}
$l_str = file_get_contents("test.htm"); //獲取網頁內容
$l_str = eregi_replace(".*<table", "<table", $l_str);
$l_str = eregi_replace("</table>.*", "", $l_str);
$l_str = eregi_replace("/r", "", $l_str); //去掉/r
$l_str = eregi_replace("/n", "", $l_str); //去掉/n
$l_str = eregi_replace("</tr>", "/n", $l_str); //使表格的一行成為一行數據
$l_str = strip_tags($l_str, "<td>"); //只保留<td>,</td>html表記
while(eregi("<td.+colspan", $l_str)) $l_str = fun_proc_colspan($l_str);
while(eregi("<td.+rowspan", $l_str)) $l_str = fun_proc_rowspan($l_str);
/////至此,表格處理已完成.
$l_str = eregi_replace("</td>", "<br>", $l_str); //給每一格之間加上標記
$l_str = strip_tags($l_str, "<br>");
$l_a = explode("/n", $l_str);
for($i=0; $i<count($l_a); $i++) {
$l_str = trim($l_a[$i]);
if(empty($l_str)) continue;
$l_b = explode("<br>", $l_str);
foreach($l_b as $val) {
echo $val." "; //表格每列內容間加空格
}
echo "<br>"; //表格的一行顯示為一行
}
//附測試用例和測試結果:
/***********測試用表格*************************************************************
<!doctype html public "-//w3c//dtd html 4.01 transitional//en">
<html>
<head>
<title>untitled document</title>
<meta http-equiv="content-type" content="text/html; charset=gb2312">
</head>
<body>
<table width="200" border="1" cellspacing="0" cellpadding="0">
<tr>
<td>1</td>
<td>2</td>
<td>3</td>
<td>4</td>
<td>5</td>
<td>6</td>
<td>7</td>
</tr>
<tr>
<td>a1</td>
<td rowspan="7">22</td>
<td>a2</td>
<td colspan="4">a3</td>
</tr>
<tr>
<td>b1</td>
<td colspan="2">b2</td>
<td>b3</td>
<td>b4</td>
<td rowspan="4">33</td>
</tr>
<tr>
<td>c1</td>
<td>c2</td>
<td>c3</td>
<td>c4</td>
<td rowspan="2">44</td>
</tr>
<tr>
<td>d1</td>
<td>d2</td>
<td>d3</td>
<td>d4</td>
</tr>
<tr>
<td>e1</td>
<td>e2</td>
<td>e3</td>
<td>e4</td>
<td>e5</td>
</tr>
<tr>
<td>f1</td>
<td>f2</td>
<td colspan="4">f3</td>
</tr>
<tr>
<td>g1</td>
<td colspan="5">g2</td>
</tr>
<tr>
<td colspan="7">h1</td>
</tr>
<tr>
<td>i1</td>
<td colspan="3">i2</td>
<td>i3</td>
<td>i4</td>
<td>i5</td>
</tr>
<tr>
<td>j1</td>
<td colspan="3">j2</td>
<td>j3</td>
<td colspan="2">j4</td>
</tr>
<tr>
<td colspan="6">k1</td>
<td>k2</td>
</tr>
<tr>
<td colspan="6">l1</td>
<td>l2</td>
</tr>
<tr>
<td colspan="5">m1</td>
<td>m2</td>
<td>m3</td>
</tr>
</table>
</body>
</html>
*********************************************************************************/
/**************操作結果的表格***********************************************
<table border='1'>
<tr> <td>1</td> <td>2</td> <td>3</td> <td>4</td> <td>5</td> <td>6</td> <td>7</td> </tr>
<tr><td>a1</td> <td ="7">22</td> <td>a2</td> <td ="4">a3</td> <td>a3</td><td>a3</td><td>a3</td></tr>
<tr><td>b1</td> <td>22</td><td ="2">b2</td> <td>b2</td><td>b3</td> <td>b4</td> <td ="4">33</td> </tr>
<tr><td>c1</td> <td>22</td><td>c2</td> <td>c3</td> <td>c4</td> <td ="2">44</td> <td>33</td></tr>
<tr><td>d1</td> <td>22</td><td>d2</td> <td>d3</td> <td>d4</td> <td>44</td><td>33</td></tr>
<tr><td>e1</td> <td>22</td><td>e2</td> <td>e3</td> <td>e4</td> <td>e5</td> <td>33</td></tr>
<tr><td>f1</td> <td>22</td><td>f2</td> <td ="4">f3</td> <td>f3</td><td>f3</td><td>f3</td></tr>
<tr><td>g1</td> <td>22</td><td ="5">g2</td> <td>g2</td><td>g2</td><td>g2</td><td>g2</td></tr>
<tr><td ="7">h1</td> <td>h1</td><td>h1</td><td>h1</td><td>h1</td><td>h1</td><td>h1</td></tr>
<tr><td>i1</td> <td ="3">i2</td> <td>i2</td><td>i2</td><td>i3</td> <td>i4</td> <td>i5</td> </tr>
<tr><td>j1</td> <td ="3">j2</td> <td>j2</td><td>j2</td><td>j3</td> <td ="2">j4</td> <td>j4</td></tr>
<tr><td ="6">k1</td> <td>k1</td><td>k1</td><td>k1</td><td>k1</td><td>k1</td><td>k2</td> </tr>
<tr><td ="6">l1</td> <td>l1</td><td>l1</td><td>l1</td><td>l1</td><td>l1</td><td>l2</td> </tr>
<tr><td ="5">m1</td> <td>m1</td><td>m1</td><td>m1</td><td>m1</td><td>m2</td> <td>m3</td> </tr>
</table>
******************************************************************/
測試用表:
| 1 | 2 | 3 | 4 | 5 | 6 | 7 |
| a1 | 22 | a2 | a3 |
| b1 | b2 | b3 | b4 | 33 |
| c1 | c2 | c3 | c4 | 44 |
| d1 | d2 | d3 | d4 |
| e1 | e2 | e3 | e4 | e5 |
| f1 | f2 | f3 |
| g1 | g2 |
| h1 |
| i1 | i2 | i3 | i4 | i5 |
| j1 | j2 | j3 | j4 |
| k1 | k2 |
| l1 | l2 |
| m1 | m2 | m3 |
測試結果的表:
| 1 | 2 | 3 | 4 | 5 | 6 | 7 |
| a1 | 22 | a2 | a3 | a3 | a3 | a3 |
| b1 | 22 | b2 | b2 | b3 | b4 | 33 |
| c1 | 22 | c2 | c3 | c4 | 44 | 33 |
| d1 | 22 | d2 | d3 | d4 | 44 | 33 |
| e1 | 22 | e2 | e3 | e4 | e5 | 33 |
| f1 | 22 | f2 | f3 | f3 | f3 | f3 |
| g1 | 22 | g2 | g2 | g2 | g2 | g2 |
| h1 | h1 | h1 | h1 | h1 | h1 | h1 |
| i1 | i2 | i2 | i2 | i3 | i4 | i5 |
| j1 | j2 | j2 | j2 | j3 | j4 | j4 |
| k1 | k1 | k1 | k1 | k1 | k1 | k2 |
| l1 | l1 | l1 | l1 | l1 | l1 | l2 |
| m1 | m1 | m1 | m1 | m1 | m2 | m3 |
中國最大的web開發資源網站及技術社區,