本文目錄一覽:
- 1、php抓取網頁內容不完整
- 2、用PHP獲取網頁內容的時候獲取不完全 求能完全獲取的方法
- 3、php獲取數據為什麼curl獲取不完整
- 4、php獲取數據為什麼curl獲取不完整?而用file_get_contents能獲取完整?
php抓取網頁內容不完整
用CURL可以抓取到的 可能是你網速太慢超時了 所以抓取不完整 用 curl_setopt($ch, CURLOPT_TIMEOUT, 360) 試試看
用PHP獲取網頁內容的時候獲取不完全 求能完全獲取的方法
curl是獲取的服務器端編譯後返回的代碼 . 是原始的.
curl 里 沒法解析執行js . 所以得到的一直都是原始的代碼.
而瀏覽器在拿到服務器返回的代碼的時候, 會執行頁面加載js ,
js 會在DOM 里動態添加或修改刪除一些節點元素.
查看元素看到的就是經過js一頓處理之後的html內容 不是原始的了. ..
所以單純使用curl 沒法獲取到”所見即所存”的代碼…
php獲取數據為什麼curl獲取不完整
因為,PHP CURL庫默認1024字節的長度不等待數據的返回,所以你那段代碼需增加一項配置:
curl_setopt($ch, CURLOPT_HTTPHEADER, array(‘Expect:’));
給你一個更全面的封裝方法:
function req_curl($url, $status = null, $options = array())
{
$res = ”;
$options = array_merge(array(
‘follow_local’ = true,
‘timeout’ = 30,
‘max_redirects’ = 4,
‘binary_transfer’ = false,
‘include_header’ = false,
‘no_body’ = false,
‘cookie_location’ = dirname(__FILE__) . ‘/cookie’,
‘useragent’ = ‘Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1’,
‘post’ = array() ,
‘referer’ = null,
‘ssl_verifypeer’ = 0,
‘ssl_verifyhost’ = 0,
‘headers’ = array(
‘Expect:’
) ,
‘auth_name’ = ”,
‘auth_pass’ = ”,
‘session’ = false
) , $options);
$options[‘url’] = $url;
$s = curl_init();
if (!$s) return false;
curl_setopt($s, CURLOPT_URL, $options[‘url’]);
curl_setopt($s, CURLOPT_HTTPHEADER, $options[‘headers’]);
curl_setopt($s, CURLOPT_SSL_VERIFYPEER, $options[‘ssl_verifypeer’]);
curl_setopt($s, CURLOPT_SSL_VERIFYHOST, $options[‘ssl_verifyhost’]);
curl_setopt($s, CURLOPT_TIMEOUT, $options[‘timeout’]);
curl_setopt($s, CURLOPT_MAXREDIRS, $options[‘max_redirects’]);
curl_setopt($s, CURLOPT_RETURNTRANSFER, true);
curl_setopt($s, CURLOPT_FOLLOWLOCATION, $options[‘follow_local’]);
curl_setopt($s, CURLOPT_COOKIEJAR, $options[‘cookie_location’]);
curl_setopt($s, CURLOPT_COOKIEFILE, $options[‘cookie_location’]);
if (!empty($options[‘auth_name’]) is_string($options[‘auth_name’]))
{
curl_setopt($s, CURLOPT_USERPWD, $options[‘auth_name’] . ‘:’ . $options[‘auth_pass’]);
}
if (!empty($options[‘post’]))
{
curl_setopt($s, CURLOPT_POST, true);
curl_setopt($s, CURLOPT_POSTFIELDS, $options[‘post’]);
//curl_setopt($s, CURLOPT_POSTFIELDS, array(‘username’ = ‘aeon’, ‘password’ = ‘111111’));
}
if ($options[‘include_header’])
{
curl_setopt($s, CURLOPT_HEADER, true);
}
if ($options[‘no_body’])
{
curl_setopt($s, CURLOPT_NOBODY, true);
}
if ($options[‘session’])
{
curl_setopt($s, CURLOPT_COOKIESESSION, true);
curl_setopt($s, CURLOPT_COOKIE, $options[‘session’]);
}
curl_setopt($s, CURLOPT_USERAGENT, $options[‘useragent’]);
curl_setopt($s, CURLOPT_REFERER, $options[‘referer’]);
$res = curl_exec($s);
$status = curl_getinfo($s, CURLINFO_HTTP_CODE);
curl_close($s);
return $res;
}
php獲取數據為什麼curl獲取不完整?而用file_get_contents能獲取完整?
因為,PHP CURL庫默認1024字節的長度不等待數據的返回,所以你那段代碼需增加一項配置:
curl_setopt($ch, CURLOPT_HTTPHEADER, array(‘Expect:’));
給你一個更全面的封裝方法:
function req_curl($url, $status = null, $options = array())
{
$res = ”;
$options = array_merge(array(
‘follow_local’ = true,
‘timeout’ = 30,
‘max_redirects’ = 4,
‘binary_transfer’ = false,
‘include_header’ = false,
‘no_body’ = false,
‘cookie_location’ = dirname(__FILE__) . ‘/cookie’,
‘useragent’ = ‘Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1’,
‘post’ = array() ,
‘referer’ = null,
‘ssl_verifypeer’ = 0,
‘ssl_verifyhost’ = 0,
‘headers’ = array(
‘Expect:’
) ,
‘auth_name’ = ”,
‘auth_pass’ = ”,
‘session’ = false
) , $options);
$options[‘url’] = $url;
$s = curl_init();
if (!$s) return false;
curl_setopt($s, CURLOPT_URL, $options[‘url’]);
curl_setopt($s, CURLOPT_HTTPHEADER, $options[‘headers’]);
curl_setopt($s, CURLOPT_SSL_VERIFYPEER, $options[‘ssl_verifypeer’]);
curl_setopt($s, CURLOPT_SSL_VERIFYHOST, $options[‘ssl_verifyhost’]);
curl_setopt($s, CURLOPT_TIMEOUT, $options[‘timeout’]);
curl_setopt($s, CURLOPT_MAXREDIRS, $options[‘max_redirects’]);
curl_setopt($s, CURLOPT_RETURNTRANSFER, true);
curl_setopt($s, CURLOPT_FOLLOWLOCATION, $options[‘follow_local’]);
curl_setopt($s, CURLOPT_COOKIEJAR, $options[‘cookie_location’]);
curl_setopt($s, CURLOPT_COOKIEFILE, $options[‘cookie_location’]);
if (!empty($options[‘auth_name’]) is_string($options[‘auth_name’]))
{
curl_setopt($s, CURLOPT_USERPWD, $options[‘auth_name’] . ‘:’ . $options[‘auth_pass’]);
}
if (!empty($options[‘post’]))
{
curl_setopt($s, CURLOPT_POST, true);
curl_setopt($s, CURLOPT_POSTFIELDS, $options[‘post’]);
//curl_setopt($s, CURLOPT_POSTFIELDS, array(‘username’ = ‘aeon’, ‘password’ = ‘111111’));
}
if ($options[‘include_header’])
{
curl_setopt($s, CURLOPT_HEADER, true);
}
if ($options[‘no_body’])
{
curl_setopt($s, CURLOPT_NOBODY, true);
}
if ($options[‘session’])
{
curl_setopt($s, CURLOPT_COOKIESESSION, true);
curl_setopt($s, CURLOPT_COOKIE, $options[‘session’]);
}
curl_setopt($s, CURLOPT_USERAGENT, $options[‘useragent’]);
curl_setopt($s, CURLOPT_REFERER, $options[‘referer’]);
$res = curl_exec($s);
$status = curl_getinfo($s, CURLINFO_HTTP_CODE);
curl_close($s);
return $res;
}
原創文章,作者:小藍,如若轉載,請註明出處:https://www.506064.com/zh-hant/n/282995.html