|
@@ -1,7 +1,7 @@
|
|
|
import uvicorn
|
|
|
import warnings
|
|
|
import os
|
|
|
-from fastapi import FastAPI, UploadFile, File
|
|
|
+from fastapi import FastAPI, UploadFile, File, BackgroundTasks, routing
|
|
|
from openpyxl import load_workbook
|
|
|
from openpyxl.utils.cell import coordinate_from_string
|
|
|
from openpyxl.comments import Comment
|
|
@@ -10,6 +10,12 @@ from fastapi.middleware.cors import CORSMiddleware
|
|
|
from datetime import datetime
|
|
|
from fastapi.responses import FileResponse
|
|
|
from fastapi.staticfiles import StaticFiles
|
|
|
+import asyncio
|
|
|
+from concurrent.futures.process import ProcessPoolExecutor
|
|
|
+from fastapi.responses import StreamingResponse
|
|
|
+import shutil
|
|
|
+import uuid
|
|
|
+import time
|
|
|
|
|
|
warnings.filterwarnings("ignore")
|
|
|
|
|
@@ -24,18 +30,211 @@ app.add_middleware(
|
|
|
allow_headers=["*"],
|
|
|
)
|
|
|
|
|
|
-shared_dir = 'cache'
|
|
|
-app.mount(f"/{shared_dir}", StaticFiles(directory="download_cache"), name={shared_dir})
|
|
|
+shared_dir = "download_cache"
|
|
|
+app.mount(f"/{shared_dir}", StaticFiles(directory=shared_dir), name={shared_dir})
|
|
|
|
|
|
+diff_dir = "diff_cache"
|
|
|
+app.mount(f"/{diff_dir}", StaticFiles(directory=diff_dir), name={diff_dir})
|
|
|
|
|
|
+cur_cache_path = "cur_cache/"
|
|
|
|
|
|
-@app.post("/uploadfile")
|
|
|
-async def create_upload_file(file: UploadFile = File(...)):
|
|
|
+
|
|
|
+def get_title_row(sheet):
|
|
|
+ title_row_num = -1
|
|
|
+ row_range = sheet[1:5]
|
|
|
+ for i, r in enumerate(row_range):
|
|
|
+ for j, c in enumerate(r):
|
|
|
+ print(f"第{i + 1 }行,第{j}列,值:{c.value}")
|
|
|
+ if "证件号码" == c.value or "收入(元)" == c.value or "务工月收入" == c.value:
|
|
|
+ title_row_num = c.row
|
|
|
+ return title_row_num
|
|
|
+
|
|
|
+
|
|
|
+def get_all_numbers(sheet, start_row, cow):
|
|
|
+ keys = {}
|
|
|
+ for i in range(start_row, sheet.max_row * 2):
|
|
|
+ id_number = sheet[f"{cow}{i}"].value
|
|
|
+ if id_number is None:
|
|
|
+ break
|
|
|
+
|
|
|
+ keys[id_number] = i
|
|
|
+ return keys
|
|
|
+
|
|
|
+
|
|
|
+def deal_diff_data(file: UploadFile = File(...), target_name: str = None):
|
|
|
+ print("开始处理")
|
|
|
+
|
|
|
+ def generate_diff_data(start_row, max_row, sheet, title_dict, keys, need_copy_data):
|
|
|
+ for i in range(start_row, max_row):
|
|
|
+ id_number = sheet[f"{title_dict['证件号码']}{i}"].value
|
|
|
+ if id_number is None:
|
|
|
+ # print(f"该行身份证为空{i}")
|
|
|
+ continue
|
|
|
+ if id_number not in keys:
|
|
|
+ # print(f"该身份证不在省办列表中{id_number}")
|
|
|
+ need_copy_data.append(i)
|
|
|
+
|
|
|
+ new = None
|
|
|
+
|
|
|
+ # 删除所需要的执行时间太久了,暂时废弃删除的分支
|
|
|
+ # if sheet.max_row > len(need_copy_data) * 2 and 3 < 2:
|
|
|
+ # """如果diff很少,那么创建一个新表,一条一条添加"""
|
|
|
+ new = workbook.create_sheet("仅" + sheet.title + "有的数据")
|
|
|
+ for i, row in enumerate(need_copy_data):
|
|
|
+ print(f"开始写入{i}行")
|
|
|
+ for j, c in enumerate(sheet[row]):
|
|
|
+ new.cell(i + 1, j + 1, c.value)
|
|
|
+ # else:
|
|
|
+ # """如果diff数据比较多,直接copy旧表,删除不需要的数据"""
|
|
|
+ # start = time.time()
|
|
|
+ # new = workbook.copy_worksheet(sheet)
|
|
|
+ # print(f" copy执行时间{time.time() - start}")
|
|
|
+ # for i in range(max_row, start_row, -1):
|
|
|
+ # id_number = sheet[f"{title_dict['证件号码']}{i}"].value
|
|
|
+ # if id_number is None:
|
|
|
+ # print(f"该行身份证为空{i}")
|
|
|
+ # continue
|
|
|
+ # if id_number in keys:
|
|
|
+ # print(f"该身份证不在省办列表中{id_number}")
|
|
|
+ # new.delete_rows(i)
|
|
|
+
|
|
|
+ return need_copy_data
|
|
|
+
|
|
|
+ dir_path = cur_cache_path
|
|
|
+ savename = dir_path + file.filename
|
|
|
+
|
|
|
+ contents = file.file.read()
|
|
|
+
|
|
|
+ with open(savename, "wb") as f:
|
|
|
+ f.write(contents)
|
|
|
+
|
|
|
+ # 读取excel表
|
|
|
+ workbook = load_workbook(savename)
|
|
|
+ # 获取指定的sheet
|
|
|
+ sheet_names = workbook.sheetnames
|
|
|
+
|
|
|
+ first = None
|
|
|
+ second = None
|
|
|
+ for index, name in enumerate(sheet_names):
|
|
|
+ print(f"表名为:{name}")
|
|
|
+ if name == "省办务工":
|
|
|
+ first = workbook[name]
|
|
|
+ elif name == "国办务工":
|
|
|
+ second = workbook[name]
|
|
|
+
|
|
|
+ if first is None or second is None:
|
|
|
+ return {
|
|
|
+ "code": 202,
|
|
|
+ "msg": "没有找到待处理的 省办务工 和 国办务工 两张表格",
|
|
|
+ }
|
|
|
+
|
|
|
+ first_title_row_num = get_title_row(first)
|
|
|
+
|
|
|
+ if first_title_row_num == -1:
|
|
|
+ return {"code": 202, "msg": "省办务工没有找到数据"}
|
|
|
+
|
|
|
+ first_title_dict = {}
|
|
|
+ first_title_rows = first[first_title_row_num]
|
|
|
+
|
|
|
+ for title_cell in first_title_rows:
|
|
|
+ x, y = coordinate_from_string(title_cell.coordinate)
|
|
|
+ first_title_dict[title_cell.value] = x
|
|
|
+
|
|
|
+ first_keys = get_all_numbers(first, first_title_row_num + 1, first_title_dict["证件号码"])
|
|
|
+
|
|
|
+ second_title_row_num = get_title_row(second)
|
|
|
+
|
|
|
+ if second_title_row_num == -1:
|
|
|
+ return {"code": 202, "msg": "国办务工没有找到数据"}
|
|
|
+
|
|
|
+ second_title_dict = {}
|
|
|
+ second_title_rows = second[second_title_row_num]
|
|
|
+
|
|
|
+ for title_cell in second_title_rows:
|
|
|
+ x, y = coordinate_from_string(title_cell.coordinate)
|
|
|
+ second_title_dict[title_cell.value] = x
|
|
|
+
|
|
|
+ second_keys = get_all_numbers(second, second_title_row_num + 1, second_title_dict["证件号码"])
|
|
|
+ generate_diff_data(
|
|
|
+ first_title_row_num + 1, first.max_row, first, first_title_dict, second_keys, [first_title_row_num]
|
|
|
+ )
|
|
|
+
|
|
|
+ generate_diff_data(
|
|
|
+ second_title_row_num + 1,
|
|
|
+ second.max_row,
|
|
|
+ second,
|
|
|
+ second_title_dict,
|
|
|
+ first_keys,
|
|
|
+ [second_title_row_num],
|
|
|
+ )
|
|
|
+
|
|
|
+ workbook.save(savename)
|
|
|
+
|
|
|
+ move_file(savename, target_name)
|
|
|
+ print(f"处理完成,目标文件夹{diff_dir}, {target_name}")
|
|
|
+
|
|
|
+
|
|
|
+def clean_with_path(dir_path):
|
|
|
+ for file in os.listdir(dir_path):
|
|
|
+ # 遍历output_path文件夹下文件,删除后缀为woff的字体文件
|
|
|
+ if file.endswith(".xlsx"):
|
|
|
+ os.remove(f"{dir_path}/{file}")
|
|
|
+
|
|
|
+
|
|
|
+def move_dir(old_path, new_path):
|
|
|
+ filelist = os.listdir(old_path) # 列出该目录下的所有文件,listdir返回的文件列表是不包含路径的。
|
|
|
+ print(f"old path is {old_path}, new path is {new_path}")
|
|
|
+ for file in filelist:
|
|
|
+ src = os.path.join(old_path, file)
|
|
|
+ dst = os.path.join(new_path, file)
|
|
|
+ print("src:", src)
|
|
|
+ print("dst:", dst)
|
|
|
+ shutil.move(src, dst)
|
|
|
+
|
|
|
+
|
|
|
+def move_file(old_path, new_path):
|
|
|
+ shutil.move(old_path, new_path)
|
|
|
+
|
|
|
+
|
|
|
+@app.get("/python_api/test")
|
|
|
+def test():
|
|
|
+ # move_file(cur_cache_path + "123.xlsx", diff_dir + "/" + uuid.uuid4().hex + ".xlsx")
|
|
|
+ print("准备睡眠")
|
|
|
+ time.sleep(5)
|
|
|
+ print("执行完成")
|
|
|
+ return {"code": 200, "message": "成功"}
|
|
|
+
|
|
|
+
|
|
|
+@app.get("/python_api/is_exist")
|
|
|
+def is_exist(file_name: str):
|
|
|
+ print(f"查询file{file_name}是否存在")
|
|
|
+ for dir in [f"{diff_dir}/", f"{shared_dir}/"]:
|
|
|
+ file_path = os.path.join(dir, file_name)
|
|
|
+ if os.path.exists(file_path):
|
|
|
+ return {"code": 200, "exists": True, "filePath": f"{dir}" + file_name}
|
|
|
+ else:
|
|
|
+ return {"code": 200, "exists": False}
|
|
|
+
|
|
|
+
|
|
|
+@app.post("/python_api/upload_diff_file")
|
|
|
+def diff_file(file: UploadFile, background_tasks: BackgroundTasks):
|
|
|
+ # clean_with_path("cur_cache")
|
|
|
+ # clean_with_path("diff_cache")
|
|
|
+ file_name = diff_dir + "/" + uuid.uuid4().hex + ".xlsx"
|
|
|
+ background_tasks.add_task(deal_diff_data, file, file_name)
|
|
|
+ print(f"开始处理{file_name}")
|
|
|
+ return {"code": 200, "msg": "开始处理", "filePath": file_name}
|
|
|
+
|
|
|
+
|
|
|
+@app.post("/python_api/uploadfile")
|
|
|
+def create_upload_file(file: UploadFile = File(...)):
|
|
|
print(f"开始处理{file.filename}")
|
|
|
- contents = await file.read()
|
|
|
+ # clean_with_path(f"{shared_dir}/")
|
|
|
+ contents = file.file.read()
|
|
|
|
|
|
- savename = "download_cache/" + file.filename
|
|
|
- # savename = "uploadfile/" + file.filename
|
|
|
+ savename = f"{shared_dir}/" + file.filename
|
|
|
+ if file.filename.endswith("xlsx"):
|
|
|
+ savename = f"{shared_dir}/" + uuid.uuid4().hex + ".xlsx"
|
|
|
with open(savename, "wb") as f:
|
|
|
f.write(contents)
|
|
|
# 读取excel表
|
|
@@ -69,20 +268,15 @@ async def create_upload_file(file: UploadFile = File(...)):
|
|
|
x, y = coordinate_from_string(title_cell.coordinate)
|
|
|
title_dict[title_cell.value] = x
|
|
|
|
|
|
- # print(title_dict)
|
|
|
-
|
|
|
# 开始读取表格内容
|
|
|
read_data(sheet, title_row_num + 1, sheet.max_row, title_dict)
|
|
|
|
|
|
# 保存文档
|
|
|
workbook.save(savename)
|
|
|
|
|
|
- # return FileResponse(savename, media_type="application/octet-stream", filename="deal.xlsx")
|
|
|
- # return FileResponse(savename)
|
|
|
- # return FileResponse(savename, media_type='application/xlsx', filename="deal.xlsx")
|
|
|
- # return savename
|
|
|
print(f"处理完了{file.filename}文件")
|
|
|
- return {"code": 200, "msg": "分析完成,请点击下载查看分析结果", "filePath": f"/{shared_dir}/" + file.filename}
|
|
|
+
|
|
|
+ return {"code": 200, "msg": "分析完成,请点击下载查看分析结果", "filePath": savename}
|
|
|
|
|
|
|
|
|
def calculate_age_from_id_number(id_number):
|
|
@@ -244,7 +438,6 @@ def check_risk_type(ws, row_num, title_dict):
|
|
|
if risk is not None and len(risk) > 0:
|
|
|
risks.append((risk, i))
|
|
|
|
|
|
-
|
|
|
# 定义:健康帮扶,"综合保障,社会帮扶,义务教育保障, 教育帮扶, 住房安全保障, 搬迁, 饮水安全保障, 产业帮扶, 就业帮扶, 金融帮扶, 公益岗位帮扶等常量
|
|
|
HEALTH_SUPPORT = "健康帮扶"
|
|
|
COMPREHENSIVE_GUARANTEE = "综合保障"
|
|
@@ -275,7 +468,7 @@ def check_risk_type(ws, row_num, title_dict):
|
|
|
forbinddens_option = [
|
|
|
HOUSING_SECURITY_GUARANTEE,
|
|
|
DRINKING_WATER_SECURITY_GUARANTEE,
|
|
|
- EDUCATION_SUPPORT,
|
|
|
+ HEALTH_SUPPORT,
|
|
|
]
|
|
|
elif risk == "因残":
|
|
|
must_selected_option = [COMPREHENSIVE_GUARANTEE, SOCIAL_SUPPORT]
|
|
@@ -374,30 +567,12 @@ def check_assistance(ws, row_num, title_dict):
|
|
|
target = ws[f"{title_dict[type]}{row_num}"].value
|
|
|
for key in ["其他", "技能培训"]:
|
|
|
if key in target:
|
|
|
- comment_and_fill_yellow_for(target, f"实施开发式帮扶填写状态下,{type} 不允许选择 {key}")
|
|
|
+ comment_and_fill_yellow_for(
|
|
|
+ ws[f"{title_dict[type]}{row_num}"], f"实施开发式帮扶填写状态下,{type} 不允许选择 {key}"
|
|
|
+ )
|
|
|
return
|
|
|
|
|
|
|
|
|
-# 填写了的话,剩下四项有一个是其他或者技能培训就不行
|
|
|
-
|
|
|
-# info_number = "户主证件号码"
|
|
|
-# identitycard = ws[f"{title_dict[info_number]}{row_num}"].value
|
|
|
-# if len(identitycard) not in [15, 18, 20, 22]:
|
|
|
-# target = ws[f"{title_dict[info_number]}{row_num}"]
|
|
|
-# comment_and_fill_yellow_for(target, "31.监测对象家庭成员证件号码位数异常(证件号码非15、18、20、22位)")
|
|
|
-
|
|
|
-
|
|
|
-# def get_item_values_for(ws, row_num, title_dict, items):
|
|
|
-# result = []
|
|
|
-# for item in items:
|
|
|
-# if item not in title_dict:
|
|
|
-# continue
|
|
|
-# value = ws[f"{title_dict[item]}{row_num}"].value
|
|
|
-# if value is not None:
|
|
|
-# result.append(value)
|
|
|
-# return result
|
|
|
-
|
|
|
-
|
|
|
def comment_and_fill_yellow_for(target, comment):
|
|
|
target.comment = Comment(text=comment, author="system")
|
|
|
yellow_fill = PatternFill(patternType="solid", fgColor="FFFF00")
|
|
@@ -405,6 +580,4 @@ def comment_and_fill_yellow_for(target, comment):
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
- # result = calculate_age_from_id_number("532801200607144126")
|
|
|
- # print(result)
|
|
|
uvicorn.run("data_verification:app", host="0.0.0.0", port=8500, reload=True)
|