-
Notifications
You must be signed in to change notification settings - Fork 214
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
38 changed files
with
432 additions
and
108 deletions.
There are no files selected for viewing
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
import os | ||
import openpyxl | ||
import xlrd | ||
import datetime | ||
import time | ||
|
||
|
||
def change_datatype(row_data: list): | ||
""" | ||
excel单元格的内容类型检测和转换 | ||
参数: | ||
row_data:行数据,列表格式 | ||
""" | ||
result_data = [] | ||
for rd in row_data: | ||
if type(rd) == datetime.datetime: | ||
t = rd.strftime("%Y-%m-%d %H:%M:%S") | ||
elif type(rd) == str: | ||
t = rd | ||
elif type(rd) == int: | ||
t = str(rd) | ||
elif type(rd) == float: | ||
t = str(rd) | ||
elif type(rd) is None: | ||
t = '' | ||
else: | ||
t = str(rd) | ||
result_data.append(t) | ||
return result_data | ||
|
||
|
||
def find_key(search_key: str, row_content: str): | ||
""" | ||
检测关键词和内容 | ||
参数: | ||
search_key:关键词 | ||
row_content:行内容 | ||
""" | ||
if search_key in row_content: | ||
return True | ||
else: | ||
return False | ||
|
||
|
||
def process_xls(path, file): | ||
""" | ||
读取xls后缀的excel文件 | ||
参数: | ||
path:文件所在路径 | ||
file:文件名 | ||
""" | ||
filepath = os.path.join(path, file) | ||
try: | ||
rb = xlrd.open_workbook(filepath, formatting_info=True) | ||
except: | ||
return False | ||
sheet_names = rb.sheet_names() | ||
space_line = 0 | ||
for ws_name in sheet_names: | ||
ws = rb.sheet_by_name(ws_name) | ||
rows = ws.nrows | ||
cols = ws.ncols | ||
for r in range(rows): | ||
values = [ws.cell(r, c).value for c in range(cols)] | ||
values = change_datatype(values) | ||
values = " ".join(values) | ||
if values: | ||
yield filepath, ws_name, r, values # 文件路径,工作表名,行数,行内容 | ||
else: | ||
if space_line < 10: | ||
space_line += 1 | ||
else: | ||
break | ||
|
||
|
||
def process_xlsx(path, file): | ||
""" | ||
读取xlsx后缀的excel文件 | ||
参数: | ||
path:文件所在路径 | ||
file:文件名 | ||
""" | ||
filepath = os.path.join(path, file) | ||
try: | ||
wb = openpyxl.load_workbook(filepath, read_only=True, data_only=True) | ||
except: | ||
return False | ||
worksheets_name = wb.sheetnames | ||
space_line = 0 | ||
for ws_name in worksheets_name: | ||
ws = wb[ws_name] | ||
for index, row in enumerate(ws.rows): | ||
values = [r.value for r in row if r.value != None] | ||
values = change_datatype(values) | ||
values = " ".join(values) | ||
if values: | ||
yield filepath, ws_name, index, values # 文件路径,工作表名,行数,行内容 | ||
else: | ||
if space_line < 10: | ||
space_line += 1 | ||
else: | ||
break | ||
|
||
|
||
def find_excel_data(search_key: str, target_dir: str): | ||
""" | ||
检索指定目录下的excel文件和过滤 | ||
参数: | ||
search_key:检索的关键词 | ||
target_dir:目标文件夹 | ||
""" | ||
for path, dirs, files in os.walk(target_dir): | ||
files = [file for file in files if not file.startswith('~$')] # 过滤掉正打开的excel文件 | ||
xls_files = [file for file in files if file.endswith('.xls')] # 取出所有的xls后缀文件 | ||
xlsx_files = [file for file in files if file.endswith('.xlsx')] # 取出所有的xlsx后缀文件 | ||
for xls in xls_files: | ||
for data in process_xls(path, xls): | ||
filepath, ws_name, index, values = data | ||
status = find_key(search_key, values) | ||
if status: | ||
yield filepath, ws_name, index, values | ||
for xlsx in xlsx_files: | ||
for data in process_xlsx(path, xlsx): | ||
filepath, ws_name, index, values = data | ||
status = find_key(search_key, values) | ||
if status: | ||
yield filepath, ws_name, index, values # 输出内容:路径/文件名、工作表名、行数、行内容 | ||
|
||
|
||
if __name__ == '__main__': | ||
|
||
time1 = time.time() | ||
search_key = '刘家站垦殖场' | ||
target_dir = './' | ||
for data in find_excel_data(search_key, target_dir): | ||
print(list(data)) | ||
time2 = time.time() | ||
print("\n程序运行结束,停止运行。耗时:{}秒".format(round(time2 - time1, 2))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
import os | ||
import xlrd, xlwt | ||
import openpyxl | ||
import datetime | ||
|
||
|
||
# | ||
|
||
def generate_xls(filepath: str, worksheet_data: dict): | ||
datetime_str = datetime.datetime.now().strftime('%Y-%m-%d_%H%M%S') | ||
new_filepath = filepath.replace('.xls', '_Split_{}.xls'.format(datetime_str)) | ||
new_workbook = xlwt.Workbook(encoding='utf-8') | ||
for worksheet_name, row_data_list in worksheet_data.items(): | ||
new_worksheet = new_workbook.add_sheet(worksheet_name) | ||
for row_index, row_data in enumerate(row_data_list): | ||
for column_index, data in enumerate(row_data): | ||
new_worksheet.write(row_index, column_index, data) | ||
new_workbook.save(new_filepath) | ||
return new_filepath | ||
|
||
|
||
def process_xls(filepath, column: int, worksheet_name: str = None): | ||
try: | ||
workbook = xlrd.open_workbook(filepath, formatting_info=True) | ||
except: | ||
return "文件读取异常:{}".format(filepath) | ||
if worksheet_name: | ||
worksheet = workbook.sheet_by_name(worksheet_name) | ||
else: | ||
worksheet = workbook.sheet_by_index(0) | ||
rows = worksheet.nrows | ||
cols = worksheet.ncols | ||
split_data_dict = {} | ||
for r in range(rows): | ||
row_data = [worksheet.cell(r, c).value if worksheet.cell(r, c).value else ' ' for c in range(cols)] | ||
temp_data = row_data[column - 1] | ||
temp_data_list = split_data_dict.get(temp_data, []) | ||
temp_data_list.append(row_data) | ||
split_data_dict[temp_data] = temp_data_list | ||
new_filepath = generate_xls(filepath, split_data_dict) | ||
return "数据保存在新文件中,文件名:{}".format(new_filepath) | ||
|
||
|
||
def generate_xlsx(filepath: str, worksheet_data: dict): | ||
datetime_str = datetime.datetime.now().strftime('%Y-%m-%d_%H%M%S') | ||
new_filepath = filepath.replace('.xlsx', '_Split_{}.xlsx'.format(datetime_str)) | ||
new_workbook = openpyxl.Workbook() | ||
for worksheet_name, row_data_list in worksheet_data.items(): | ||
new_worksheet = new_workbook.create_sheet(worksheet_name) | ||
for row_data in row_data_list: | ||
new_worksheet.append(row_data) | ||
new_workbook.save(new_filepath) | ||
return new_filepath | ||
|
||
|
||
def process_xlsx(filepath: str, column: int, worksheet_name: str = None): | ||
try: | ||
workbook = openpyxl.load_workbook(filepath, read_only=True, data_only=True) | ||
except: | ||
return "文件读取异常:{}".format(filepath) | ||
if worksheet_name: | ||
worksheet = workbook.get_sheet_by_name(worksheet_name) | ||
else: | ||
worksheet = workbook.active | ||
if worksheet.max_column < column: | ||
return "最大列数是{},取不到第{}列".format(worksheet.max_column, column) | ||
|
||
split_data_dict = {} | ||
for row in worksheet.rows: | ||
row_data = [cell.value if cell.value else ' ' for cell in row] | ||
temp_data = row_data[column - 1] | ||
temp_data_list = split_data_dict.get(temp_data, []) | ||
temp_data_list.append(row_data) | ||
split_data_dict[temp_data] = temp_data_list | ||
new_filepath = generate_xlsx(filepath, split_data_dict) | ||
return "数据保存在新文件中,文件名:{}".format(new_filepath) | ||
|
||
|
||
def split_excel(filepath: str, column: int, worksheet_name: str = None): | ||
if filepath.endswith('.xlsx'): | ||
result = process_xlsx(filepath, column, worksheet_name) | ||
elif filepath.endswith('.xls'): | ||
result = process_xls(filepath, column, worksheet_name) | ||
else: | ||
return "文件格式不对,不进行处理" | ||
return result | ||
|
||
|
||
if __name__ == "__main__": | ||
filename = 'sedemo.xls' | ||
# filename = 'SEdemo.xlsx' | ||
result = split_excel(filename, 6) # 处理文件,表格的第六列,worksheet_name指定工作表,不指定则读取文件默认工作表 | ||
print(result) |
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,5 @@ | ||
# pip install python-office -i https://pypi.python.org/simple -U | ||
# pip install python-office -i https://pypi.python.org/simple -U | ||
|
||
# 1、pip freeze > allpackages.txt | ||
# 2、pip uninstall -r allpackages.txt -y | ||
# 3、pip install --upgrade python-office |
Binary file not shown.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
File renamed without changes.
Empty file.
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
Binary file not shown.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
import unittest | ||
|
||
if __name__ == '__main__': | ||
test_dir = './test_unit' | ||
suite = unittest.defaultTestLoader.discover(test_dir, pattern='test_*.py') | ||
# 创建测试runner,执行测试用例集 | ||
with open('test_result.txt', 'w+') as f: | ||
runner = unittest.TextTestRunner(stream=f, verbosity=2) | ||
runner.run(suite) |
This file was deleted.
Oops, something went wrong.
Empty file.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Empty file.
Oops, something went wrong.