import paramiko
import threading
import time
import os
def get_remote_file_size(ssh_info, remote_path):
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
ssh.connect(**ssh_info)
sftp = ssh.open_sftp()
# get remote file size
remote_file_size = sftp.stat(remote_path).st_size
print ("remote_file_size:{}".format(remote_file_size))
sftp.close()
ssh.close()
return remote_file_size
def download_chunk_file(ssh_info, remote_path, local_path, start_pos, end_pos):
print("download_chunk_file start")
client = paramiko.SSHClient()
client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
client.connect(**ssh_info)
sftp = client.open_sftp()
# open both local and remote file
local_file = open(local_path, "r+b")
remote_file = sftp.open(remote_path, "rb")
# right shift to same start position
local_file.seek(start_pos)
remote_file.seek(start_pos)
# print("start_pos ~ end_pos: {} ~ {}".format(start_pos, end_pos))
while True:
# read chunk file from remote file
read_start = time.time()
buffer = remote_file.read(end_pos - start_pos)
if not buffer:
break
else:
print("read cost time {:.2f}s".format(time.time() - read_start))
# write chunk file to local file
write_start = time.time()
local_file.write(buffer)
print("write cost time {:.2f}s".format(time.time() - write_start))
print("chunk file with start_pos ~ end_pos: {} ~ {}, Download successfully!".format(start_pos, end_pos))
remote_file.close()
local_file.close()
client.close()
print("download_chunk_file end")
def download_multithreading(ssh_info, remote_path, local_path):
# get number of CPU with high efficiency
# double the number of CPU cores for optimal performance
num_threads = os.cpu_count() * 2
#num_threads = 1
print("number of CPU is {}, number of thread is {}".format(os.cpu_count(),num_threads))
# get remote file size
file_size = get_remote_file_size(ssh_info, remote_path)
# create new empty local file, same size with remote file
with open(local_path, "wb") as f:
f.truncate(file_size)
# calculate the chunk size, n-1 threading handle each chunk size sub-file, and last threading handle last remaining sub-file
chunk_size = file_size // num_threads
print("chunk_size is {}".format(chunk_size))
# create number of thread
threads = []
# create a thread for each chunk
for index in range(num_threads):
#print("thread: {}".format(index))
start_pos = index * chunk_size
end_pos = start_pos + chunk_size
# last thread will download the remaining bytes
if index == num_threads - 1:
end_pos = file_size - 1
args = (ssh_info, remote_path, local_path, start_pos, end_pos)
thread = threading.Thread(target=download_chunk_file, args=args)
thread.start()
print(thread)
threads.append(thread)
# wait for all the threads to finish
for thread in threads:
thread.join()
print(thread)
def main():
host = "host"
port = 22
username = "username"
password = "password"
remote_path = '/remote_dir/remote_file'
local_path = '/local_dir/local_file'
ssh_info = {
"hostname": host,
"port": port,
"username": username,
"password": password,
}
multithread_download_start = time.time()
download_multithreading(ssh_info, remote_path, local_path)
multithread_download_end = time.time()
multithread_download_cost = multithread_download_end - multithread_download_start
print("Full file Download successfully! Cost time: {:.2f}s".format(multithread_download_cost))
if __name__ == "__main__":
main()
$ python multi_thread_download_single_bigfile_def.py
number of CPU is 4, number of thread is 8
remote_file_size:63376366
chunk_size is 7922045
download_chunk_file start
download_chunk_file start
download_chunk_file start
download_chunk_file start
download_chunk_file start
download_chunk_file start
download_chunk_file start
download_chunk_file start
read cost time 3.84s
write cost time 0.01s
read cost time 3.94s
read cost time 4.00s
write cost time 0.02s
write cost time 0.02s
read cost time 3.88s
write cost time 0.01s
read cost time 3.96s
write cost time 0.01s
read cost time 4.01s
read cost time 4.00s
write cost time 0.01s
write cost time 0.02s
read cost time 4.29s
write cost time 0.01s
read cost time 0.01s
write cost time 0.00s
chunk file with start_pos ~ end_pos: 55454315 ~ 63376365, Download successfully!
download_chunk_file end
read cost time 2.95s
write cost time 0.01s
read cost time 2.94s
write cost time 0.00s
read cost time 3.11s
write cost time 0.01s
read cost time 3.06s
write cost time 0.01s
read cost time 3.10s
write cost time 0.01s
read cost time 3.07s
write cost time 0.00s
read cost time 3.07s
write cost time 0.01s
read cost time 0.01s
write cost time 0.00s
chunk file with start_pos ~ end_pos: 47532270 ~ 55454315, Download successfully!
download_chunk_file end
read cost time 2.50s
write cost time 0.00s
read cost time 2.67s
read cost time 2.61s
write cost time 0.01s
write cost time 0.00s
read cost time 0.04s
write cost time 0.00s
chunk file with start_pos ~ end_pos: 39610225 ~ 47532270, Download successfully!
download_chunk_file end
read cost time 2.58s
write cost time 0.00s
read cost time 2.64s
write cost time 0.00s
read cost time 2.57s
write cost time 0.00s
read cost time 2.30s
write cost time 0.01s
read cost time 2.39s
write cost time 0.00s
read cost time 0.00s
write cost time 0.00s
chunk file with start_pos ~ end_pos: 31688180 ~ 39610225, Download successfully!
download_chunk_file end
read cost time 2.28s
write cost time 0.00s
read cost time 2.29s
write cost time 0.00s
read cost time 2.37s
write cost time 0.00s
read cost time 2.20s
write cost time 0.00s
read cost time 0.00s
write cost time 0.00s
chunk file with start_pos ~ end_pos: 23766135 ~ 31688180, Download successfully!
download_chunk_file end
read cost time 2.18s
write cost time 0.00s
read cost time 2.18s
write cost time 0.00s
read cost time 2.30s
write cost time 0.00s
read cost time 2.18s
write cost time 0.02s
read cost time 2.13s
write cost time 0.00s
read cost time 0.04s
write cost time 0.00s
chunk file with start_pos ~ end_pos: 15844090 ~ 23766135, Download successfully!
download_chunk_file end
read cost time 2.24s
write cost time 0.00s
read cost time 2.05s
write cost time 0.00s
read cost time 2.06s
read cost time 2.07s
write cost time 6.32s
read cost time 0.00s
write cost time 0.00s
write cost time 4.46s
chunk file with start_pos ~ end_pos: 7922045 ~ 15844090, Download successfully!
read cost time 0.00s
write cost time 0.00s
download_chunk_file end
chunk file with start_pos ~ end_pos: 0 ~ 7922045, Download successfully!
download_chunk_file end
Full file Download successfully! Cost: 25.37s