如何find我将使用cURLredirect的位置?

我试图让curl遵循redirect,但我不能完全正确地工作。 我有一个string,我想作为一个GET参数发送到服务器,并得到的结果URL。

例:

string= 狗头人危害
Url = www.wowhead.com/search?q=Kobold+Worker

如果您转到该url,则会将您redirect到“www.wowhead.com/npc=257”。 我想curl返回这个URL到我的PHP代码,以便我可以提取“npc = 257”并使用它。

当前代码:

function npcID($name) { $urltopost = "http://www.wowhead.com/search?q=" . $name; $ch = curl_init(); curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.1) Gecko/20061204 Firefox/2.0.0.1"); curl_setopt($ch, CURLOPT_URL, $urltopost); curl_setopt($ch, CURLOPT_REFERER, "http://www.wowhead.com"); curl_setopt($ch, CURLOPT_HTTPHEADER, Array("Content-Type:application/x-www-form-urlencoded")); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE); return curl_getinfo($ch, CURLINFO_EFFECTIVE_URL); } 

然而这返回www.wowhead.com/search?q=Kobold+Worker而不是www.wowhead.com/npc=257

我怀疑在外部redirect发生之前PHP正在返回。 我该如何解决这个问题?

要使cURL遵循redirect,请使用:

 curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); 

呃…我不认为你正在执行curl…尝试:

curl_exec($ch);

…设置选项之后,并在curl_getinfo()调用之前。

编辑:如果你只是想找出页面redirect到哪里,我会在这里使用的build议,只是使用curl抓住标题,并从他们提取位置:标题:

 $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_HEADER, true); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); $result = curl_exec($ch); if (preg_match('~Location: (.*)~i', $result, $match)) { $location = trim($match[1]); } 

添加这一行来curlinizialization

 curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); 

并在curl_close之前使用getinfo

 $redirectURL = curl_getinfo($ch,CURLINFO_EFFECTIVE_URL ); 

ES:

 $ch = curl_init($url); curl_setopt($ch, CURLOPT_HEADER, false); curl_setopt($ch, CURLOPT_USERAGENT,'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13'); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_BINARYTRANSFER, true); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); curl_setopt($ch, CURLOPT_CONNECTTIMEOUT ,0); curl_setopt($ch, CURLOPT_TIMEOUT, 60); $html = curl_exec($ch); $redirectURL = curl_getinfo($ch,CURLINFO_EFFECTIVE_URL ); curl_close($ch); 

上面的答案在我的一台服务器上并不适用,对于basedir来说,我做了一些改动。 下面的代码适用于我所有的服务器。

 $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_HEADER, true); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false); curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE); $a = curl_exec($ch); curl_close( $ch ); // the returned headers $headers = explode("\n",$a); // if there is no redirection this will be the final url $redir = $url; // loop through the headers and check for a Location: str $j = count($headers); for($i = 0; $i < $j; $i++){ // if we find the Location header strip it and fill the redir var if(strpos($headers[$i],"Location:") !== false){ $redir = trim(str_replace("Location:","",$headers[$i])); break; } } // do whatever you want with the result echo redir; 

这里select的答案是体面的,但它的大小写敏感,并不能防止相对location:标题(一些网站做的)或页面实际上可能有短语Location:在他们的内容…(目前zillow)。

有点马虎,但一些快速编辑,使这个更聪明一点是:

 function getOriginalURL($url) { $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_HEADER, true); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false); curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE); $result = curl_exec($ch); $httpStatus = curl_getinfo($ch, CURLINFO_HTTP_CODE); curl_close($ch); // if it's not a redirection (3XX), move along if ($httpStatus < 300 || $httpStatus >= 400) return $url; // look for a location: header to find the target URL if(preg_match('/location: (.*)/i', $result, $r)) { $location = trim($r[1]); // if the location is a relative URL, attempt to make it absolute if (preg_match('/^\/(.*)/', $location)) { $urlParts = parse_url($url); if ($urlParts['scheme']) $baseURL = $urlParts['scheme'].'://'; if ($urlParts['host']) $baseURL .= $urlParts['host']; if ($urlParts['port']) $baseURL .= ':'.$urlParts['port']; return $baseURL.$location; } return $location; } return $url; } 

请注意,这仍然只有深度1redirect。 要深入下去,您实际上需要获取内容并遵循redirect。

有时你需要获取HTTP头,但同时你不想返回这些头。**

这个框架使用recursion来处理cookie和HTTPredirect。 这里的主要思想是避免将HTTP头返回给客户端代码。

你可以build立一个非常强大的curl课。 添加POSTfunction等

 <?php class curl { static private $cookie_file = ''; static private $user_agent = ''; static private $max_redirects = 10; static private $followlocation_allowed = true; function __construct() { // set a file to store cookies self::$cookie_file = 'cookies.txt'; // set some general User Agent self::$user_agent = 'Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)'; if ( ! file_exists(self::$cookie_file) || ! is_writable(self::$cookie_file)) { throw new Exception('Cookie file missing or not writable.'); } // check for PHP settings that unfits // correct functioning of CURLOPT_FOLLOWLOCATION if (ini_get('open_basedir') != '' || ini_get('safe_mode') == 'On') { self::$followlocation_allowed = false; } } /** * Main method for GET requests * @param string $url URI to get * @return string request's body */ static public function get($url) { $process = curl_init($url); self::_set_basic_options($process); // this function is in charge of output request's body // so DO NOT include HTTP headers curl_setopt($process, CURLOPT_HEADER, 0); if (self::$followlocation_allowed) { // if PHP settings allow it use AUTOMATIC REDIRECTION curl_setopt($process, CURLOPT_FOLLOWLOCATION, true); curl_setopt($process, CURLOPT_MAXREDIRS, self::$max_redirects); } else { curl_setopt($process, CURLOPT_FOLLOWLOCATION, false); } $return = curl_exec($process); if ($return === false) { throw new Exception('Curl error: ' . curl_error($process)); } // test for redirection HTTP codes $code = curl_getinfo($process, CURLINFO_HTTP_CODE); if ($code == 301 || $code == 302) { curl_close($process); try { // go to extract new Location URI $location = self::_parse_redirection_header($url); } catch (Exception $e) { throw $e; } // IMPORTANT return return self::get($location); } curl_close($process); return $return; } static function _set_basic_options($process) { curl_setopt($process, CURLOPT_USERAGENT, self::$user_agent); curl_setopt($process, CURLOPT_COOKIEFILE, self::$cookie_file); curl_setopt($process, CURLOPT_COOKIEJAR, self::$cookie_file); curl_setopt($process, CURLOPT_RETURNTRANSFER, 1); // curl_setopt($process, CURLOPT_VERBOSE, 1); // curl_setopt($process, CURLOPT_SSL_VERIFYHOST, false); // curl_setopt($process, CURLOPT_SSL_VERIFYPEER, false); } static function _parse_redirection_header($url) { $process = curl_init($url); self::_set_basic_options($process); // NOW we need to parse HTTP headers curl_setopt($process, CURLOPT_HEADER, 1); $return = curl_exec($process); if ($return === false) { throw new Exception('Curl error: ' . curl_error($process)); } curl_close($process); if ( ! preg_match('#Location: (.*)#', $return, $location)) { throw new Exception('No Location found'); } if (self::$max_redirects-- <= 0) { throw new Exception('Max redirections reached trying to get: ' . $url); } return trim($location[1]); } } 

您可以使用:

 $redirectURL = curl_getinfo($ch,CURLINFO_REDIRECT_URL);