Excel URL to HOST. If the Host is of "www", it will be reduced to root domain.
=IF(LEFT(LEFT(SUBSTITUTE(SUBSTITUTE(A1,"https:","http:"),"http://",""),FIND("/",SUBSTITUTE(SUBSTITUTE(A1,"https:","http:"),"http://","")&"/")-1),4)="www.",MID(LEFT(SUBSTITUTE(SUBSTITUTE(A1,"https:","http:"),"http://",""),FIND("/",SUBSTITUTE(SUBSTITUTE(A1,"https:","http:"),"http://","")&"/")-1),5,256),LEFT(SUBSTITUTE(SUBSTITUTE(A1,"https:","http:"),"http://",""),FIND("/",SUBSTITUTE(SUBSTITUTE(A1,"https:","http:"),"http://","")&"/")-1))
Friday, January 13, 2012
Wednesday, January 11, 2012
clean text data with regex in MySQL
CREATE FUNCTION tokenizer(x VARCHAR(255))
RETURNS VARCHAR(255) DETERMINISTIC
RETURN replace(replace(replace(lower(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(x,':',' '),'~',' '),'!',' '),'@',' '),'#',' '),'$',' '),'%',' '),'*',' '),'(',' '),')',' '),'_',' '),'+',' '),'{',' '),'}',' '),'[',' '),']',' '),';',' '),' '' ',' '),':',' '),'"',' '),'<',' '),'>',' '),'?',' ')),' ','<>'),'><',''),'<>',' ');
RETURNS VARCHAR(255) DETERMINISTIC
RETURN replace(replace(replace(lower(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(x,':',' '),'~',' '),'!',' '),'@',' '),'#',' '),'$',' '),'%',' '),'*',' '),'(',' '),')',' '),'_',' '),'+',' '),'{',' '),'}',' '),'[',' '),']',' '),';',' '),' '' ',' '),':',' '),'"',' '),'<',' '),'>',' '),'?',' ')),' ','<>'),'><',''),'<>',' ');
Thursday, January 5, 2012
clean text data with regex in SQL Server
CREATE FUNCTION dbo.tokenizer
(
@string varchar(8000)
)
returns varchar(8000) AS
BEGIN
DECLARE @handle int, @result varchar(8000)
exec sp_oacreate 'vbscript.regexp', @handle output
exec sp_oasetproperty @handle, 'pattern', '([{}\(\)\^$&._%#!@=<>:;,~`\’\*\?\/\+\|\[\\\\]|\]|\-)'
exec sp_oasetproperty @handle, 'global', 'true'
exec sp_oasetproperty @handle, 'ignorecase', 0
exec sp_oamethod @handle, 'replace', @result output, @string, ' '
exec sp_oadestroy @handle
RETURN rtrim(ltrim(lower(replace(replace(replace(@result,' ','<>'),'><',''),'<>',' '))))
END
(
@string varchar(8000)
)
returns varchar(8000) AS
BEGIN
DECLARE @handle int, @result varchar(8000)
exec sp_oacreate 'vbscript.regexp', @handle output
exec sp_oasetproperty @handle, 'pattern', '([{}\(\)\^$&._%#!@=<>:;,~`\’\*\?\/\+\|\[\\\\]|\]|\-)'
exec sp_oasetproperty @handle, 'global', 'true'
exec sp_oasetproperty @handle, 'ignorecase', 0
exec sp_oamethod @handle, 'replace', @result output, @string, ' '
exec sp_oadestroy @handle
RETURN rtrim(ltrim(lower(replace(replace(replace(@result,' ','<>'),'><',''),'<>',' '))))
END
Subscribe to:
Posts (Atom)