Thursday, June 9, 2011

MS SQL, Parse URL procedure

Very often, I need to parse URLs into different parts, here is a quick funciton I wrote to do that. Obviously, you can make it more robust if you want.

-- Right now, it support the following parsing of a URL
-- root, host, protocol, path, pathn (where n is a number indicating the nth parts of the path), pathcoun, pathpara, port

CREATE FUNCTION [dbo].[parseURL]
(@URL varchar(500),
@type nvarchar(50) = 'root')
RETURNS varchar(500)
AS
BEGIN

declare @retval nvarchar(500)
declare @strURL nvarchar(500)
declare @path nvarchar(500)
declare @level int
declare @port nvarchar(100) ='80'
declare @tmp nvarchar(500)
declare @tmp1 nvarchar(500)

select @strURL = LOWER(@URL)

-- Fix some bad URLs in the system
IF CHARINDEX('http://http:.',@strURL) > 0     select @strURL = replace(@strURL,'http://http:.','http://')
IF @strURL='http://www.' or @strURL='http://www'     select @strURL =NULL

select @type = LOWER(@type)

if @type='protocol'
begin
    IF CHARINDEX('://',@strURL) > 0     return  left(@strURL,charindex('://',@strURL)-1)
    return 'http'
end
  
-- Does not handle URLs with user name and password
IF CHARINDEX('://',@strURL) > 0            SELECT @strURL = substring(@strURL,charindex('://',@strURL)+3, 500)

-- Remove everything after "/" if one exists
IF CHARINDEX('/',@strURL) > 0
begin
    SELECT @path = substring(@strURL,CHARINDEX('/',@strURL)+1, 500)
    SELECT @strURL = LEFT(@strURL,CHARINDEX('/',@strURL)-1)
end

if @type='host' return @strURL
IF CHARINDEX(':',@strURL) > 0 select @port = SUBSTRING(@strURL,CHARINDEX(':',@strURL)+1, 100)
if @type='port' return @port


-- handle cases like .co.uk,
-- 81-17-241-170.webhostingireland.ie


if @type='root'
begin
IF SUBSTRING(@strURL, len(@strURL)-2,1)='.'
    begin
        if len(PARSENAME(@strURL,2))<=3
             SELECT @retval = PARSENAME(@strURL,3) + '.' + PARSENAME(@strURL,2) + '.' + PARSENAME(@strURL,1)  
        else
             SELECT @retval = PARSENAME(@strURL,2) + '.' + PARSENAME(@strURL,1)
    end
ELSE
    SELECT @retval = PARSENAME(@strURL,2) + '.' + PARSENAME(@strURL,1)

RETURN @retval
end

if @type='path' return @path

--path1, path2, path3...
if left(@type,4)='path'
begin
    select @tmp =@path
  
    --deal with query strings
    if charindex('?',@tmp)>0
    begin
        if @type='pathparam' return substring(@tmp, CHARINDEX('?', @tmp), LEN(@tmp) )
        select @tmp= SUBSTRING(@tmp, 1,  CHARINDEX('?', @tmp)-1)
    end
    else if @type='pathparam' return ''
  
    -- remove leading and trailing /
    if LEFT(@tmp,1)='/' select @tmp = substring(@tmp, 2, LEN(@tmp) )
    if right(@tmp,1)='/' select @tmp = substring(@tmp, 1, LEN(@tmp)-1)
  
    select @tmp = REPLACE(@tmp, '.','^^^')      
    select @tmp = REPLACE(@tmp, '/','.')
    select @tmp = REVERSE(@tmp)
  
    -- try to count the number of parts in a path
    if @type='pathcount'
        begin
            if isnull(PARSENAME(@tmp,1),'NULL')='NULL' return 0
            if isnull(PARSENAME(@tmp,2),'NULL')='NULL' return 1
            if isnull(PARSENAME(@tmp,3),'NULL')='NULL' return 2
            if isnull(PARSENAME(@tmp,4),'NULL')='NULL' return 3
            if isnull(PARSENAME(@tmp,5),'NULL')='NULL' return 4
            if isnull(PARSENAME(@tmp,6),'NULL')='NULL' return 5
            if isnull(PARSENAME(@tmp,7),'NULL')='NULL' return 6
            if isnull(PARSENAME(@tmp,8),'NULL')='NULL' return 7
            if isnull(PARSENAME(@tmp,9),'NULL')='NULL' return 8
            if isnull(PARSENAME(@tmp,10),'NULL')='NULL' return 9
        end
    else
        begin
            select @level = cast(substring(LOWER(@type),5,10) as int)
            select @tmp = reverse(parsename (@tmp, @level))
            return REPLACE(@tmp, '^^^', '.')
        end
end


return 'BAD REQUEST'

END

GO