In [2]:
import time
# medical_file = r'E:\medical.txt'
# legal_file = r'E:\legal.txt'
out_file_count = 5
prefix = 'IDM_worldblank_text'
process_file = fr'D:\WORK\UCP\IDM_worldblank_text2.txt'
line_count = 0
with open(process_file, 'r', encoding='utf-8', buffering=1024*1024*4) as fr:
for line in fr:
line_count += 1
count_per_file = int(line_count // out_file_count) + 1
print('process_file', process_file, 'line_count', line_count, 'out_file_count', out_file_count, 'count_per_file', count_per_file)
file_ind = 0
seg_file_fp = open(f'e:/{prefix}_sub_{file_ind}.txt', 'w', encoding='utf-8')
with open(process_file, 'r', encoding='utf-8', buffering=1024*1024*4) as fr:
line_no = 0
for line in fr:
line_no += 1
seg_file_fp.write(line.strip() + '\n')
if line_no > count_per_file:
line_no = 0
file_ind += 1
seg_file_fp.close()
seg_file_fp = open(f'e:/{prefix}_sub_{file_ind}.txt', 'w', encoding='utf-8')
seg_file_fp.close()
print("success!")
process_file D:\WORK\UCP\IDM_worldblank_text2.txt line_count 31087 out_file_count 5 count_per_file 6218 success!
In [ ]:
import shutil
import os
def mkdir(path):
if not os.path.exists(path):
os.mkdir(path)
print(mkdir)
mk = [
r"d:\sci-hub_financial",
r"d:\sci-hub_financial\financial1",
r"d:\sci-hub_financial\financial2",
r"d:\sci-hub"
]
for _dir in mk:
mkdir(_dir)
print("copy", shutil.copy("sci_hub_download.py", "d:/sci-hub/sci_hub_download.py"))
print("copy", shutil.copy("start_financial1.bat", "d:/sci-hub_financial/start_financial1.bat"))
print("copy", shutil.copy("start_financial2.bat", "d:/sci-hub_financial/start_financial2.bat"))
print("copy", shutil.copy("ftp_upload_helper_financial.py", "d:/sci-hub_financial/ftp_upload_helper_financial.py"))