Public Function ReverseString(text As String)
ReverseString = StrReverse(text)
End Function
Public Function getRootDomain(text As String)
Dim i
' Remove protocol
text = Replace(text, "http://", "")
text = Replace(text, "https://", "")
text = Replace(text, "ftp://", "")
' Remove trailing / and path
i = InStr(text, "/")
If (i > 0) Then
text = Left(text, InStr(text, "/") - 1)
End If
If Left(Right(text, 5), 1) = "." Then ' info TLD
If InStr(6, ReverseString(text), ".") > 0 Then
text = Right(text, InStr(6, ReverseString(text), ".") - 1)
Else
text = Replace(text, "www.", "")
End If
ElseIf Left(Right(text, 4), 1) = "." Then '.xxx TLD
If InStr(5, ReverseString(text), ".") > 0 Then
text = Right(text, InStr(5, ReverseString(text), ".") - 1)
Else
text = Replace(text, "www.", "")
End If
Else
'xxx.TLD.uk
If Left(Right(text, 7), 1) = "." Then
If InStr(8, ReverseString(text), ".") > 0 Then
text = Right(text, InStr(8, ReverseString(text), ".") - 1)
Else 'we don't not handle http://www.wp.pl/ case well
text = Replace(text, "www.", "")
End If
'xxx.xx.uk
ElseIf Left(Right(text, 6), 1) = "." Then
If InStr(7, ReverseString(text), ".") > 0 Then
text = Right(text, InStr(7, ReverseString(text), ".") - 1)
Else 'we don't not handle http://www.wp.pl/ case well
text = Replace(text, "www.", "")
End If
Else 'xxxx.uk
If InStr(4, ReverseString(text), ".") > 0 Then
text = Right(text, InStr(4, ReverseString(text), ".") - 1)
Else
text = Replace(text, "www.", "")
End If
End If
End If
getRootDomain = text
End Function
Tuesday, June 26, 2012
Friday, January 13, 2012
Excel URL to HOST
Excel URL to HOST. If the Host is of "www", it will be reduced to root domain.
=IF(LEFT(LEFT(SUBSTITUTE(SUBSTITUTE(A1,"https:","http:"),"http://",""),FIND("/",SUBSTITUTE(SUBSTITUTE(A1,"https:","http:"),"http://","")&"/")-1),4)="www.",MID(LEFT(SUBSTITUTE(SUBSTITUTE(A1,"https:","http:"),"http://",""),FIND("/",SUBSTITUTE(SUBSTITUTE(A1,"https:","http:"),"http://","")&"/")-1),5,256),LEFT(SUBSTITUTE(SUBSTITUTE(A1,"https:","http:"),"http://",""),FIND("/",SUBSTITUTE(SUBSTITUTE(A1,"https:","http:"),"http://","")&"/")-1))
=IF(LEFT(LEFT(SUBSTITUTE(SUBSTITUTE(A1,"https:","http:"),"http://",""),FIND("/",SUBSTITUTE(SUBSTITUTE(A1,"https:","http:"),"http://","")&"/")-1),4)="www.",MID(LEFT(SUBSTITUTE(SUBSTITUTE(A1,"https:","http:"),"http://",""),FIND("/",SUBSTITUTE(SUBSTITUTE(A1,"https:","http:"),"http://","")&"/")-1),5,256),LEFT(SUBSTITUTE(SUBSTITUTE(A1,"https:","http:"),"http://",""),FIND("/",SUBSTITUTE(SUBSTITUTE(A1,"https:","http:"),"http://","")&"/")-1))
Wednesday, January 11, 2012
clean text data with regex in MySQL
CREATE FUNCTION tokenizer(x VARCHAR(255))
RETURNS VARCHAR(255) DETERMINISTIC
RETURN replace(replace(replace(lower(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(x,':',' '),'~',' '),'!',' '),'@',' '),'#',' '),'$',' '),'%',' '),'*',' '),'(',' '),')',' '),'_',' '),'+',' '),'{',' '),'}',' '),'[',' '),']',' '),';',' '),' '' ',' '),':',' '),'"',' '),'<',' '),'>',' '),'?',' ')),' ','<>'),'><',''),'<>',' ');
RETURNS VARCHAR(255) DETERMINISTIC
RETURN replace(replace(replace(lower(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(x,':',' '),'~',' '),'!',' '),'@',' '),'#',' '),'$',' '),'%',' '),'*',' '),'(',' '),')',' '),'_',' '),'+',' '),'{',' '),'}',' '),'[',' '),']',' '),';',' '),' '' ',' '),':',' '),'"',' '),'<',' '),'>',' '),'?',' ')),' ','<>'),'><',''),'<>',' ');
Thursday, January 5, 2012
clean text data with regex in SQL Server
CREATE FUNCTION dbo.tokenizer
(
@string varchar(8000)
)
returns varchar(8000) AS
BEGIN
DECLARE @handle int, @result varchar(8000)
exec sp_oacreate 'vbscript.regexp', @handle output
exec sp_oasetproperty @handle, 'pattern', '([{}\(\)\^$&._%#!@=<>:;,~`\’\*\?\/\+\|\[\\\\]|\]|\-)'
exec sp_oasetproperty @handle, 'global', 'true'
exec sp_oasetproperty @handle, 'ignorecase', 0
exec sp_oamethod @handle, 'replace', @result output, @string, ' '
exec sp_oadestroy @handle
RETURN rtrim(ltrim(lower(replace(replace(replace(@result,' ','<>'),'><',''),'<>',' '))))
END
(
@string varchar(8000)
)
returns varchar(8000) AS
BEGIN
DECLARE @handle int, @result varchar(8000)
exec sp_oacreate 'vbscript.regexp', @handle output
exec sp_oasetproperty @handle, 'pattern', '([{}\(\)\^$&._%#!@=<>:;,~`\’\*\?\/\+\|\[\\\\]|\]|\-)'
exec sp_oasetproperty @handle, 'global', 'true'
exec sp_oasetproperty @handle, 'ignorecase', 0
exec sp_oamethod @handle, 'replace', @result output, @string, ' '
exec sp_oadestroy @handle
RETURN rtrim(ltrim(lower(replace(replace(replace(@result,' ','<>'),'><',''),'<>',' '))))
END
Subscribe to:
Posts (Atom)