1、将windows上文件的格式从UTF-16转为utf-8
[root@Cloud-Server ~]# file 111111.txt
111111.txt: Little-endian UTF-16 Unicode text, with CRLF line terminators
[root@Cloud-Server ~]#
[root@Cloud-Server ~]# iconv -f utf-16 -t utf-8 111111.txt > 222222.txt
[root@Cloud-Server ~]#
[root@Cloud-Server ~]# file 222222.txt
222222.txt: UTF-8 Unicode text, with CRLF line terminators
[root@Cloud-Server ~]#
原始txt 文件格式如下
[root@Cloud-Server ~]# more /root/222222.txt |head -10
102100014904:中国工商银行股份有限公司北京岳各庄支行
102100002003:中国工商银行股份有限公司北京白云路支行
102100000064:中国工商银行股份有限公司北京樱桃园支行
102100000185:中国工商银行股份有限公司北京菜市口支行
102100009795:中国工商银行股份有限公司北京中航油支行
102100001153:中国工商银行股份有限公司北京城关支行
102100001274:中国工商银行股份有限公司北京燕莎支行
102100002484:中国工商银行股份有限公司北京天宁寺支行
102100022099:中国工商银行股份有限公司北京酒仙桥支行
102100023737:中国工商银行股份有限公司北京科技园东区支行
[root@Cloud-Server ~]#
2、使用python3 将txt转为csv文件
import csv
out = open('222222.csv','w',newline='')
csv_writer = csv.writer(out,dialect='excel')
f = open("/root/222222.txt","r")
for line in f.readlines():
line=line.replace(':','\t')
list = line.split()
csv_writer.writerow(list)
# 检查csv文件
# 读取csv文件的前10行
import csv
with open('/root/222222.csv') as csvfile:
reader = csv.reader(csvfile)
for i,row in enumerate(reader):
#print(row['BANKCODE'], row['BANKNAME'])
print(row)
if(i >= 9):
break
# 输出结果如下:
['102100014904', '中国工商银行股份有限公司北京岳各庄支行']
['102100002003', '中国工商银行股份有限公司北京白云路支行']
['102100000064', '中国工商银行股份有限公司北京樱桃园支行']
['102100000185', '中国工商银行股份有限公司北京菜市口支行']
['102100009795', '中国工商银行股份有限公司北京中航油支行']
['102100001153', '中国工商银行股份有限公司北京城关支行']
['102100001274', '中国工商银行股份有限公司北京燕莎支行']
['102100002484', '中国工商银行股份有限公司北京天宁寺支行']
['102100022099', '中国工商银行股份有限公司北京酒仙桥支行']
['102100023737', '中国工商银行股份有限公司北京科技园东区支行']
查看最终生成的csv文件的前10行数据如下:
[root@Cloud-Server ~]# more /home/oracle/222222.csv |head -10
102100014904,中国工商银行股份有限公司北京岳各庄支行
102100002003,中国工商银行股份有限公司北京白云路支行
102100000064,中国工商银行股份有限公司北京樱桃园支行
102100000185,中国工商银行股份有限公司北京菜市口支行
102100009795,中国工商银行股份有限公司北京中航油支行
102100001153,中国工商银行股份有限公司北京城关支行
102100001274,中国工商银行股份有限公司北京燕莎支行
102100002484,中国工商银行股份有限公司北京天宁寺支行
102100022099,中国工商银行股份有限公司北京酒仙桥支行
102100023737,中国工商银行股份有限公司北京科技园东区支行
[root@Cloud-Server ~]#
3、需要将/root/222222.csv 传到数据库服务器上
cp /root/222222.csv /home/oracle/222222.csv
chown oracle:oinstall /home/oracle/222222.csv
4、将远程连接工具的字符集设置为G2312 (好像可以省略,在后期查看数据的时候可能会需要进行设置)
5、设置操作系统的字符集与数据库字符集保持一致(好像可以省略)
export NLS_LANG=AMERICAN_AMERICA.ZHS16GBK
6、创建表
drop table LG_BANKNO_BNB_20220620 purge ;
create table LG_BANKNO_BNB_20220620(BANKCODE number,BANKNAME varchar2(400)) ;
7、创建sqlldr的控制文件
$ mkdir -p /home/oracle/external_tb/data
create or replace directory data_dir as '/home/oracle/';
grant read,write on directory data_dir to public;
8、创建外部表
drop table emp_new purge ;
create table emp_new(
BANKCODE number,
BANKNAME varchar2(400)
)
organization external
(
type oracle_loader
default directory data_dir
access parameters(
records delimited by newline
badfile 'emp_new%a_%p.bad'
logfile 'emp_new%a_%p.log'
fields terminated by ','
optionally enclosed by '"'
lrtrim missing field values are null
reject rows with all null fields
)
location ('222222.csv')
)
parallel
reject limit unlimited;
说明:即使存在不满足要求的数据,也会在创建外部表的时候能正常创建成功。
9、检查数据
select count(*) from emp_new ;
COUNT(*)
----------
126234
set lines 200 pages 200
col BANKCODE for 9999999999999999999
col BANKNAME for a70
select * from emp_new where rownum <=10 ;
BANKCODE BANKNAME
-------------------- ----------------------------------------------------------------------
102100014904 中国工商银行股份有限公司北京岳各庄支行
102100002003 中国工商银行股份有限公司北京白云路支行
102100000064 中国工商银行股份有限公司北京樱桃园支行
102100000185 中国工商银行股份有限公司北京菜市口支行
102100009795 中国工商银行股份有限公司北京中航油支行
102100001153 中国工商银行股份有限公司北京城关支行
102100001274 中国工商银行股份有限公司北京燕莎支行
102100002484 中国工商银行股份有限公司北京天宁寺支行
102100022099 中国工商银行股份有限公司北京酒仙桥支行
102100023737 中国工商银行股份有限公司北京科技园东区支行
10 rows selected.
10、检查日志文件,在/home/oracle/目录下
[oracle@Cloud-Server ~]$
[oracle@Cloud-Server ~]$ ls -lrt |grep emp_new
-rw-r--r-- 1 oracle oinstall 8668743 Jun 20 21:06 222222.txt
-rw-r--r-- 1 oracle oinstall 1356 Jul 2 14:10 emp_new000_22513.log
-rw-r--r-- 1 oracle oinstall 3996 Jul 2 14:10 emp_new000_20990.log
-rw-r--r-- 1 oracle oinstall 1728 Jul 2 14:10 emp_new000_20990.bad
查看未导入的数据,一定要在加载完外部文件中所有数据后再检查,否则可能由于没有读取到存在问题的数据而未生产bad文件,导致结果异常。
[oracle@Cloud-Server ~]$ more emp_new000_20990.bad
java.net.ConnectException: Connection timed out: connect
at java.net.DualStackPlainSocketImpl.connect0(Native Method)
at java.net.DualStackPlainSocketImpl.socketConnect(DualStackPlainSocketImpl.java:79)
at java.net.AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:350)
at java.net.AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:206)
at java.net.AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:188)
at java.net.PlainSocketImpl.connect(PlainSocketImpl.java:172)
at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392)
at java.net.Socket.connect(Socket.java:589)
at java.net.Socket.connect(Socket.java:538)
at sun.net.NetworkClient.doConnect(NetworkClient.java:180)
at sun.net.www.http.HttpClient.openServer(HttpClient.java:463)
at sun.net.www.http.HttpClient.openServer(HttpClient.java:558)
at sun.net.www.http.HttpClient.(HttpClient.java:242)
at sun.net.www.http.HttpClient.New(HttpClient.java:339)
at sun.net.www.http.HttpClient.New(HttpClient.java:357)
at sun.net.www.protocol.http.HttpURLConnection.getNewHttpClient(HttpURLConnection.java:1220)
at sun.net.www.protocol.http.HttpURLConnection.plainConnect0(HttpURLConnection.java:1156)
at sun.net.www.protocol.http.HttpURLConnection.plainConnect(HttpURLConnection.java:1050)
at sun.net.www.protocol.http.HttpURLConnection.connect(HttpURLConnection.java:984)
at com.system.common.util.SHbank.SHNSBankUtil.hiscomm(SHNSBankUtil.java:149)
at com.system.common.util.SHbank.SHNSBankUtil.getBankList(SHNSBankUtil.java:573)
at com.system.common.util.SHbank.SHNSBankUtil.download(SHNSBankUtil.java:592)
at com.system.common.util.SHbank.SHNSBankUtil.main(SHNSBankUtil.java:966)
[oracle@Cloud-Server ~]$
[oracle@Cloud-Server ~]$ more emp_new000_22513.log
LOG file opened at 07/02/22 14:09:59
KUP-05008: Warning: Intra source concurrency disabled because the input data is in DELIMITED record format and the characterset is neither fixed width nor UTF8.
Field Definitions for table EMP_NEW
Record format DELIMITED BY NEWLINE
Data in file has same endianness as the platform
Reject rows with all null fields
Fields in Data Source:
BANKCODE CHAR (255)
Terminated by ":"
Enclosed by """ and """
Trim whitespace from left and right
BANKNAME CHAR (400)
Terminated by ":"
Enclosed by """ and """
Trim whitespace from left and right
LOG file opened at 07/02/22 14:10:38
KUP-05008: Warning: Intra source concurrency disabled because the input data is in DELIMITED record format and the characterset is neither fixed width nor UTF8.
Field Definitions for table EMP_NEW
Record format DELIMITED BY NEWLINE
Data in file has same endianness as the platform
Reject rows with all null fields
Fields in Data Source:
BANKCODE CHAR (255)
Terminated by ":"
Enclosed by """ and """
Trim whitespace from left and right
BANKNAME CHAR (400)
Terminated by ":"
Enclosed by """ and """
Trim whitespace from left and right
[oracle@Cloud-Server ~]$
[oracle@Cloud-Server ~]$ more emp_new000_20990.log
LOG file opened at 07/02/22 14:09:59
KUP-05008: Warning: Intra source concurrency disabled because the input data is in DELIMITED record format and the characterset is neither fixed width nor UTF8.
Field Definitions for table EMP_NEW
Record format DELIMITED BY NEWLINE
Data in file has same endianness as the platform
Reject rows with all null fields
Fields in Data Source:
BANKCODE CHAR (255)
Terminated by ":"
Enclosed by """ and """
Trim whitespace from left and right
BANKNAME CHAR (400)
Terminated by ":"
Enclosed by """ and """
Trim whitespace from left and right
LOG file opened at 07/02/22 14:10:38
KUP-05008: Warning: Intra source concurrency disabled because the input data is in DELIMITED record format and the characterset is neither fixed width nor UTF8.
Field Definitions for table EMP_NEW
Record format DELIMITED BY NEWLINE
Data in file has same endianness as the platform
Reject rows with all null fields
Fields in Data Source:
BANKCODE CHAR (255)
Terminated by ":"
Enclosed by """ and """
Trim whitespace from left and right
BANKNAME CHAR (400)
Terminated by ":"
Enclosed by """ and """
Trim whitespace from left and right
error processing column BANKCODE in row 125778 for datafile /home/oracle/222222.txt
ORA-01722: invalid number
error processing column BANKCODE in row 125779 for datafile /home/oracle/222222.txt
ORA-01722: invalid number
error processing column BANKCODE in row 125780 for datafile /home/oracle/222222.txt
ORA-01722: invalid number
error processing column BANKCODE in row 125781 for datafile /home/oracle/222222.txt
ORA-01722: invalid number
error processing column BANKCODE in row 125782 for datafile /home/oracle/222222.txt
ORA-01722: invalid number
error processing column BANKCODE in row 125783 for datafile /home/oracle/222222.txt
ORA-01722: invalid number
error processing column BANKCODE in row 125784 for datafile /home/oracle/222222.txt
ORA-01722: invalid number
error processing column BANKCODE in row 125785 for datafile /home/oracle/222222.txt
ORA-01722: invalid number
error processing column BANKCODE in row 125786 for datafile /home/oracle/222222.txt
ORA-01722: invalid number
error processing column BANKCODE in row 125787 for datafile /home/oracle/222222.txt
ORA-01722: invalid number
error processing column BANKCODE in row 125788 for datafile /home/oracle/222222.txt
ORA-01722: invalid number
error processing column BANKCODE in row 125789 for datafile /home/oracle/222222.txt
ORA-01722: invalid number
error processing column BANKCODE in row 125790 for datafile /home/oracle/222222.txt
ORA-01722: invalid number
error processing column BANKCODE in row 125791 for datafile /home/oracle/222222.txt
ORA-01722: invalid number
error processing column BANKCODE in row 125792 for datafile /home/oracle/222222.txt
ORA-01722: invalid number
error processing column BANKCODE in row 125793 for datafile /home/oracle/222222.txt
ORA-01722: invalid number
error processing column BANKCODE in row 125794 for datafile /home/oracle/222222.txt
ORA-01722: invalid number
error processing column BANKCODE in row 125795 for datafile /home/oracle/222222.txt
ORA-01722: invalid number
error processing column BANKCODE in row 125796 for datafile /home/oracle/222222.txt
ORA-01722: invalid number
error processing column BANKCODE in row 125797 for datafile /home/oracle/222222.txt
ORA-01722: invalid number
error processing column BANKCODE in row 125798 for datafile /home/oracle/222222.txt
ORA-01722: invalid number
error processing column BANKCODE in row 125799 for datafile /home/oracle/222222.txt
ORA-01722: invalid number
error processing column BANKCODE in row 125800 for datafile /home/oracle/222222.txt
ORA-01722: invalid number
error processing column BANKCODE in row 125801 for datafile /home/oracle/222222.txt
ORA-01722: invalid number
[oracle@Cloud-Server ~]$