To get an idea of how the URLs were transformed, I perused a few hundred titles from my forum. I noticed that some URLs looked "strange" without the apostrophes, so I modified the function below to pretty them up a little, e.g.
- 1-000-000-000 -> 1000000
i-ll -> i will
& (ignored) -> and
they-ve -> they have
- Code: Select all
function format_url($url)
{
$url = preg_replace("(\[.*\])U","",$url);
$url = str_replace (' & ', ' and ', $url); //dan & -> -and-
$url = ereg_replace('([0-9])[,]([0-9])', '\\1\\2', $url); //dan 1,000 -> 1000
$url = ereg_replace('([A-Z])[\.]([A-Z])', '\\1\\2', $url); //dan U-S-A. -> USA
$find = array('"','\r\n','\n'); //dan removed &
$url = str_replace ($find, '-', $url);
$url = str_replace ('ß', 'ss', $url);
$url = str_replace (array('ö','Ö'), 'oe', $url);
$url = str_replace (array('ä','Ä'), 'ae', $url);
$url = str_replace (array('ü','Ü'), 'ue', $url);
$find = "ÀÁÂÃÅàáâãåÒÓÔÕØòóôõøÈÉÊËèéêëÇçÌÍÎÏìíîïÙÚÛùúûÿÑñ";
$replace = "AAAAAaaaaaOOOOOoooooEEEEeeeeCcIIIIiiiiUUUuuuyNn";
$url = strtr($url,$find,$replace);
$url = strtolower($url);
$url = str_replace ('\'s', 's', $url); //dan mary-s -> marys, it-s -> its
$url = str_replace ('\'t', 't', $url); //dan can-t -> cant, won-t -> wont
$url = str_replace ('\'ve', ' have', $url); //dan i-ve -> i have
$url = str_replace ('\'ll', ' will', $url); //dan i-ll -> i-will
$url = ereg_replace("[^a-zA-Z0-9]", "-", $url);
while (strstr($url, '--')) $url = str_replace('--', '-', $url);
$url = (substr($url, 0, 1) == '-') ? substr($url, 1) : $url;
$url = (substr($url, strlen($url) - 1, 1) == '-') ? substr($url, 0, strlen($url) - 1) : $url;
return $url;
}

English |
French


