Friday, January 13, 2012

Excel URL to HOST

Excel URL to HOST. If the Host is of "www", it will be reduced to root domain.

=IF(LEFT(LEFT(SUBSTITUTE(SUBSTITUTE(A1,"https:","http:"),"http://",""),FIND("/",SUBSTITUTE(SUBSTITUTE(A1,"https:","http:"),"http://","")&"/")-1),4)="www.",MID(LEFT(SUBSTITUTE(SUBSTITUTE(A1,"https:","http:"),"http://",""),FIND("/",SUBSTITUTE(SUBSTITUTE(A1,"https:","http:"),"http://","")&"/")-1),5,256),LEFT(SUBSTITUTE(SUBSTITUTE(A1,"https:","http:"),"http://",""),FIND("/",SUBSTITUTE(SUBSTITUTE(A1,"https:","http:"),"http://","")&"/")-1))

Wednesday, January 11, 2012

clean text data with regex in MySQL

CREATE FUNCTION tokenizer(x VARCHAR(255))
RETURNS VARCHAR(255) DETERMINISTIC
RETURN replace(replace(replace(lower(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(x,':',' '),'~',' '),'!',' '),'@',' '),'#',' '),'$',' '),'%',' '),'*',' '),'(',' '),')',' '),'_',' '),'+',' '),'{',' '),'}',' '),'[',' '),']',' '),';',' '),' '' ',' '),':',' '),'"',' '),'<',' '),'>',' '),'?',' ')),' ','<>'),'><',''),'<>',' ');

Thursday, January 5, 2012

clean text data with regex in SQL Server

CREATE FUNCTION dbo.tokenizer
(
    @string varchar(8000)
)
returns varchar(8000) AS
BEGIN
    DECLARE @handle int, @result varchar(8000)


    exec sp_oacreate 'vbscript.regexp', @handle output
    exec sp_oasetproperty @handle, 'pattern', '([{}\(\)\^$&._%#!@=<>:;,~`\’\*\?\/\+\|\[\\\\]|\]|\-)'
    exec sp_oasetproperty @handle, 'global', 'true'
    exec sp_oasetproperty @handle, 'ignorecase', 0
    exec sp_oamethod @handle, 'replace', @result output, @string, ' '
    exec sp_oadestroy @handle

    RETURN rtrim(ltrim(lower(replace(replace(replace(@result,' ','<>'),'><',''),'<>',' '))))
END