CREATE
OR
REPLACE
PROCEDURE
"SP_GET_ARTICLE_DATA"
(article_detail_id_$
number
,
Record_Md5_ID_$
varchar2
,
content_text_$ clob,
ReturnValue out
number
)
is
begin
declare
--
自定义数组
TYPE StringArray
IS
table
OF
VARCHAR2
(
1000
);
Offset_URL
number
;
Offset_Email
number
;
Offset_Phone
number
;
Offset_Account
number
;
--
数据下标
URL_V_Num
number
:
=
1
;
Email_V_Num
number
:
=
1
;
Phone_V_Num
number
:
=
1
;
Account_V_Num
number
:
=
1
;
--
数据元素存在标志
Is_Exsit_Flag
number
;
URL
varchar2
(
3000
);
Email
varchar2
(
200
);
Phone
varchar2
(
100
);
Bank_Account
varchar2
(
100
);
URL_Array StringArray:
=
StringArray();
Email_Array StringArray:
=
StringArray();
Phone_Array StringArray:
=
StringArray();
Account_Array StringArray:
=
StringArray();
--
常量
URL_RegExp constant
varchar2
(
300
):
=
'
(((file|gopher|news|nntp|telnet|http|ftp|https|ftps|sftp)://)|(www\.))+(([a-zA-Z0-9\._-]+\.[a-zA-Z]{2,6})|([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}))(/[a-zA-Z0-9\&
'
||
'
%_\./-~-]*)?
'
;
Email_RegExp constant
varchar2
(
200
):
=
'
[A-Za-z0-9_]+([-+.]\w+)*@\w+([-.]\w+)*\.\w+([-.]\w+)*
'
;
Phone_RegExp constant
varchar2
(
300
):
=
'
[+[:digit:]]{1,2}[-\.,[:blank:]]*\d{3}[-\.,[:blank:]]*\d{3}[-\.,[:blank:]]*\d{4}|[+[:digit:]]?[-\.,[:blank:]]*\d{1,2}[-\.,[:blank:]]*\d{3}[-\.,[:blank:]]+\d{4}|\d{3}[-\.,[:blank:]]+\d{4}
'
;
Bank_Account_RegExp constant
varchar2
(
200
):
=
'
(SA)?[[:digit:]]{22}|(SA)?[[:blank:]]*([[:digit:]]{4}|\d{2})[-\.,[:blank:]]*[[:digit:]]{4}[-\.,[:blank:]]*[[:digit:]]{4}[-\.,[:blank:]]*[[:digit:]]{4}[-\.,[:blank:]]*[[:digit:]]{4}[-\.,[:blank:]]*(\d{4}|\d{2})
'
;
URL_Escape constant
varchar2
(
100
):
=
'
\.jpg|\.rar|\.zip|\.png|\.gif|\.pdf|\.doc|\.excel|\.swf|\.mpg|\.\.|\.rmvb
'
;
DUP_VAL_INDEX exception;
pragma exception_init(DUP_VAL_INDEX,
-
0001
);
begin
if
article_detail_id_$
is
not
null
and
article_detail_id_$
<>
0
then
Offset_URL:
=
1
;
Offset_Email:
=
1
;
Offset_Phone:
=
1
;
Offset_Account:
=
1
;
while
Offset_URL
<>
0
or
Offset_Email
<>
0
or
Offset_Phone
<>
0
or
Offset_Account
<>
0
loop
--
获取URL
if
Offset_URL
<>
0
then
select
trim(to_char(REGEXP_SUBSTR(content_text_$,URL_RegExp, Offset_URL,
1
,
'
i
'
)))
into
URL
from
dual;
--
获取从offset位置开始的第一个符合URL表达式的字符串位置,并赋给offset
select
REGEXP_INSTR(content_text_$, URL_RegExp, Offset_URL,
1
,
1
,
'
i
'
)
into
Offset_URL
from
dual;
end
if
;
--
获取Email
if
Offset_Email
<>
0
then
select
trim(to_char(REGEXP_SUBSTR(content_text_$,Email_RegExp, Offset_Email,
1
,
'
i
'
)))
into
Email
from
dual;
--
获取从offset位置开始的第一个符合Email表达式的字符串位置,并赋给offset
select
REGEXP_INSTR(content_text_$, Email_RegExp, Offset_Email,
1
,
1
,
'
i
'
)
into
Offset_Email
from
dual;
end
if
;
--
获取Phone
if
Offset_Phone
<>
0
then
select
trim(to_char(REGEXP_SUBSTR(content_text_$,Phone_RegExp, Offset_Phone,
1
,
'
i
'
)))
into
Phone
from
dual;
--
获取从offset位置开始的第一个符合Phone表达式的字符串位置,并赋给offset
select
REGEXP_INSTR(content_text_$, Phone_RegExp, Offset_Phone,
1
,
1
,
'
i
'
)
into
Offset_Phone
from
dual;
end
if
;
--
获取Bank_Account
if
Offset_Account
<>
0
then
select
trim(to_char(REGEXP_SUBSTR(content_text_$,Bank_Account_RegExp, Offset_Account,
1
,
'
i
'
)))
into
Bank_Account
from
dual;
--
获取从offset位置开始的第一个符合Phone表达式的字符串位置,并赋给offset
select
REGEXP_INSTR(content_text_$, Bank_Account_RegExp, Offset_Account,
1
,
1
,
'
i
'
)
into
Offset_Account
from
dual;
end
if
;
--
URL插入到数组中
if
(length(URL)
>
0
and
length(URL)
<
800
)
and
REGEXP_INSTR(URL, URL_Escape,
1
,
1
,
0
,
'
i
'
)
=
0
then
Is_Exsit_Flag :
=
0
;
FOR
I
IN
1
..URL_Array.
COUNT
LOOP
if
URL
=
URL_Array(I)
then
Is_Exsit_Flag:
=
1
;
end
if
;
END
LOOP;
--
如果数组中不存在当前值,则插入到数组
if
Is_Exsit_Flag
=
0
then
URL_Array.EXTEND(
1
);
URL_Array(URL_V_Num):
=
URL;
URL_V_Num:
=
URL_V_Num
+
1
;
end
if
;
end
if
;
--
Email插入到数组中
if
(length(Email)
>
0
and
length(Email)
<
100
)
then
Is_Exsit_Flag :
=
0
;
FOR
I
IN
1
..Email_Array.
COUNT
LOOP
if
Email
=
Email_Array(I)
then
Is_Exsit_Flag:
=
1
;
end
if
;
END
LOOP;
--
如果数组中不存在当前值,则插入到数组
if
Is_Exsit_Flag
=
0
then
Email_Array.EXTEND(
1
);
Email_Array(Email_V_Num):
=
Email;
Email_V_Num:
=
Email_V_Num
+
1
;
end
if
;
end
if
;
--
Phone插入到数组中
if
(length(Phone)
>
6
and
length(Phone)
<
100
)
then
Is_Exsit_Flag :
=
0
;
FOR
I
IN
1
..Phone_Array.
COUNT
LOOP
if
Phone
=
Phone_Array(I)
then
Is_Exsit_Flag:
=
1
;
end
if
;
END
LOOP;
--
如果数组中不存在当前值,则插入到数组
if
Is_Exsit_Flag
=
0
then
Phone_Array.EXTEND(
1
);
Phone_Array(Phone_V_Num):
=
Phone;
Phone_V_Num:
=
Phone_V_Num
+
1
;
end
if
;
end
if
;
--
Bank_Account插入到数组中
if
(length(Bank_Account)
>
0
and
length(Bank_Account)
<
100
)
then
Is_Exsit_Flag :
=
0
;
FOR
I
IN
1
..Account_Array.
COUNT
LOOP
if
Bank_Account
=
Account_Array(I)
then
Is_Exsit_Flag:
=
1
;
end
if
;
END
LOOP;
--
如果数组中不存在当前值,则插入到数组
if
Is_Exsit_Flag
=
0
then
Account_Array.EXTEND(
1
);
Account_Array(Account_V_Num):
=
Bank_Account;
Account_V_Num:
=
Account_V_Num
+
1
;
end
if
;
end
if
;
end
loop;
--
Dbms_Output.put_line('Array is:'||URL_Array(1));
--
插入获取到的URL
FOR
I
IN
1
..URL_Array.
COUNT
LOOP
insert
into
article_data(article_detail_id,article_record_md5_id,data_type,data_is_url,data_value)
values
(article_detail_id_$,Record_Md5_ID_$,
'
URL
'
,
'
1
'
,URL_Array(I));
END
LOOP;
--
插入获取到的Email
FOR
I
IN
1
..Email_Array.
COUNT
LOOP
insert
into
article_data(article_detail_id,article_record_md5_id,data_type,data_is_url,data_value)
values
(article_detail_id_$,Record_Md5_ID_$,
'
Email
'
,
'
0
'
,Email_Array(I));
END
LOOP;
--
插入获取到的Phone
FOR
I
IN
1
..Phone_Array.
COUNT
LOOP
insert
into
article_data(article_detail_id,article_record_md5_id,data_type,data_is_url,data_value)
values
(article_detail_id_$,Record_Md5_ID_$,
'
Phone
'
,
'
0
'
,Phone_Array(I));
END
LOOP;
--
插入获取到的Bank_Account
FOR
I
IN
1
..Account_Array.
COUNT
LOOP
insert
into
article_data(article_detail_id,article_record_md5_id,data_type,data_is_url,data_value)
values
(article_detail_id_$,Record_Md5_ID_$,
'
Bank Account
'
,
'
0
'
,Account_Array(I));
END
LOOP;
end
if
;
Exception
when
NO_DATA_FOUND
then
ReturnValue:
=-
1000
;
when
DUP_VAL_INDEX
then
ReturnValue:
=-
1001
;
when
VALUE_ERROR
then
ReturnValue:
=-
1002
;
when
TOO_MANY_ROWS
then
ReturnValue:
=-
1003
;
when
OTHERS
then
ReturnValue:
=-
1004
;
--
Dbms_Output.put_line('The SQLCode is: '||SQLCODE);
--
Dbms_Output.put_line('The SQLERRM is: '||SQLERRM);
end
;
end
SP_GET_ARTICLE_DATA;