Browse Source

修复一处多添加的规则

root 1 year ago
parent
commit
207fd23d15
1 changed files with 213 additions and 40 deletions
  1. 213 40
      data_verification.py

+ 213 - 40
data_verification.py

@@ -1,7 +1,7 @@
 import uvicorn
 import warnings
 import os
-from fastapi import FastAPI, UploadFile, File
+from fastapi import FastAPI, UploadFile, File, BackgroundTasks, routing
 from openpyxl import load_workbook
 from openpyxl.utils.cell import coordinate_from_string
 from openpyxl.comments import Comment
@@ -10,6 +10,12 @@ from fastapi.middleware.cors import CORSMiddleware
 from datetime import datetime
 from fastapi.responses import FileResponse
 from fastapi.staticfiles import StaticFiles
+import asyncio
+from concurrent.futures.process import ProcessPoolExecutor
+from fastapi.responses import StreamingResponse
+import shutil
+import uuid
+import time
 
 warnings.filterwarnings("ignore")
 
@@ -24,18 +30,211 @@ app.add_middleware(
     allow_headers=["*"],
 )
 
-shared_dir = 'cache'
-app.mount(f"/{shared_dir}", StaticFiles(directory="download_cache"), name={shared_dir})
+shared_dir = "download_cache"
+app.mount(f"/{shared_dir}", StaticFiles(directory=shared_dir), name={shared_dir})
 
+diff_dir = "diff_cache"
+app.mount(f"/{diff_dir}", StaticFiles(directory=diff_dir), name={diff_dir})
 
+cur_cache_path = "cur_cache/"
 
-@app.post("/uploadfile")
-async def create_upload_file(file: UploadFile = File(...)):
+
+def get_title_row(sheet):
+    title_row_num = -1
+    row_range = sheet[1:5]
+    for i, r in enumerate(row_range):
+        for j, c in enumerate(r):
+            print(f"第{i + 1 }行,第{j}列,值:{c.value}")
+            if "证件号码" == c.value or "收入(元)" == c.value or "务工月收入" == c.value:
+                title_row_num = c.row
+    return title_row_num
+
+
+def get_all_numbers(sheet, start_row, cow):
+    keys = {}
+    for i in range(start_row, sheet.max_row * 2):
+        id_number = sheet[f"{cow}{i}"].value
+        if id_number is None:
+            break
+
+        keys[id_number] = i
+    return keys
+
+
+def deal_diff_data(file: UploadFile = File(...), target_name: str = None):
+    print("开始处理")
+
+    def generate_diff_data(start_row, max_row, sheet, title_dict, keys, need_copy_data):
+        for i in range(start_row, max_row):
+            id_number = sheet[f"{title_dict['证件号码']}{i}"].value
+            if id_number is None:
+                # print(f"该行身份证为空{i}")
+                continue
+            if id_number not in keys:
+                # print(f"该身份证不在省办列表中{id_number}")
+                need_copy_data.append(i)
+
+        new = None
+
+        # 删除所需要的执行时间太久了,暂时废弃删除的分支
+        # if sheet.max_row > len(need_copy_data) * 2 and 3 < 2:
+        # """如果diff很少,那么创建一个新表,一条一条添加"""
+        new = workbook.create_sheet("仅" + sheet.title + "有的数据")
+        for i, row in enumerate(need_copy_data):
+            print(f"开始写入{i}行")
+            for j, c in enumerate(sheet[row]):
+                new.cell(i + 1, j + 1, c.value)
+        # else:
+        #     """如果diff数据比较多,直接copy旧表,删除不需要的数据"""
+        #     start = time.time()
+        #     new = workbook.copy_worksheet(sheet)
+        #     print(f" copy执行时间{time.time() - start}")
+        #     for i in range(max_row, start_row, -1):
+        #         id_number = sheet[f"{title_dict['证件号码']}{i}"].value
+        #         if id_number is None:
+        #             print(f"该行身份证为空{i}")
+        #             continue
+        #         if id_number in keys:
+        #             print(f"该身份证不在省办列表中{id_number}")
+        #             new.delete_rows(i)
+
+        return need_copy_data
+
+    dir_path = cur_cache_path
+    savename = dir_path + file.filename
+
+    contents = file.file.read()
+
+    with open(savename, "wb") as f:
+        f.write(contents)
+
+    # 读取excel表
+    workbook = load_workbook(savename)
+    # 获取指定的sheet
+    sheet_names = workbook.sheetnames
+
+    first = None
+    second = None
+    for index, name in enumerate(sheet_names):
+        print(f"表名为:{name}")
+        if name == "省办务工":
+            first = workbook[name]
+        elif name == "国办务工":
+            second = workbook[name]
+
+    if first is None or second is None:
+        return {
+            "code": 202,
+            "msg": "没有找到待处理的 省办务工 和 国办务工 两张表格",
+        }
+
+    first_title_row_num = get_title_row(first)
+
+    if first_title_row_num == -1:
+        return {"code": 202, "msg": "省办务工没有找到数据"}
+
+    first_title_dict = {}
+    first_title_rows = first[first_title_row_num]
+
+    for title_cell in first_title_rows:
+        x, y = coordinate_from_string(title_cell.coordinate)
+        first_title_dict[title_cell.value] = x
+
+    first_keys = get_all_numbers(first, first_title_row_num + 1, first_title_dict["证件号码"])
+
+    second_title_row_num = get_title_row(second)
+
+    if second_title_row_num == -1:
+        return {"code": 202, "msg": "国办务工没有找到数据"}
+
+    second_title_dict = {}
+    second_title_rows = second[second_title_row_num]
+
+    for title_cell in second_title_rows:
+        x, y = coordinate_from_string(title_cell.coordinate)
+        second_title_dict[title_cell.value] = x
+
+    second_keys = get_all_numbers(second, second_title_row_num + 1, second_title_dict["证件号码"])
+    generate_diff_data(
+        first_title_row_num + 1, first.max_row, first, first_title_dict, second_keys, [first_title_row_num]
+    )
+
+    generate_diff_data(
+        second_title_row_num + 1,
+        second.max_row,
+        second,
+        second_title_dict,
+        first_keys,
+        [second_title_row_num],
+    )
+
+    workbook.save(savename)
+
+    move_file(savename, target_name)
+    print(f"处理完成,目标文件夹{diff_dir}, {target_name}")
+
+
+def clean_with_path(dir_path):
+    for file in os.listdir(dir_path):
+        # 遍历output_path文件夹下文件,删除后缀为woff的字体文件
+        if file.endswith(".xlsx"):
+            os.remove(f"{dir_path}/{file}")
+
+
+def move_dir(old_path, new_path):
+    filelist = os.listdir(old_path)  # 列出该目录下的所有文件,listdir返回的文件列表是不包含路径的。
+    print(f"old path is {old_path}, new path is {new_path}")
+    for file in filelist:
+        src = os.path.join(old_path, file)
+        dst = os.path.join(new_path, file)
+        print("src:", src)
+        print("dst:", dst)
+        shutil.move(src, dst)
+
+
+def move_file(old_path, new_path):
+    shutil.move(old_path, new_path)
+
+
+@app.get("/python_api/test")
+def test():
+    # move_file(cur_cache_path + "123.xlsx", diff_dir + "/" + uuid.uuid4().hex + ".xlsx")
+    print("准备睡眠")
+    time.sleep(5)
+    print("执行完成")
+    return {"code": 200, "message": "成功"}
+
+
+@app.get("/python_api/is_exist")
+def is_exist(file_name: str):
+    print(f"查询file{file_name}是否存在")
+    for dir in [f"{diff_dir}/", f"{shared_dir}/"]:
+        file_path = os.path.join(dir, file_name)
+        if os.path.exists(file_path):
+            return {"code": 200, "exists": True, "filePath": f"{dir}" + file_name}
+        else:
+            return {"code": 200, "exists": False}
+
+
+@app.post("/python_api/upload_diff_file")
+def diff_file(file: UploadFile, background_tasks: BackgroundTasks):
+    # clean_with_path("cur_cache")
+    # clean_with_path("diff_cache")
+    file_name = diff_dir + "/" + uuid.uuid4().hex + ".xlsx"
+    background_tasks.add_task(deal_diff_data, file, file_name)
+    print(f"开始处理{file_name}")
+    return {"code": 200, "msg": "开始处理", "filePath": file_name}
+
+
+@app.post("/python_api/uploadfile")
+def create_upload_file(file: UploadFile = File(...)):
     print(f"开始处理{file.filename}")
-    contents = await file.read()
+    # clean_with_path(f"{shared_dir}/")
+    contents = file.file.read()
 
-    savename = "download_cache/" + file.filename
-    # savename = "uploadfile/" + file.filename
+    savename = f"{shared_dir}/" + file.filename
+    if file.filename.endswith("xlsx"):
+        savename = f"{shared_dir}/" + uuid.uuid4().hex + ".xlsx"
     with open(savename, "wb") as f:
         f.write(contents)
     # 读取excel表
@@ -69,20 +268,15 @@ async def create_upload_file(file: UploadFile = File(...)):
         x, y = coordinate_from_string(title_cell.coordinate)
         title_dict[title_cell.value] = x
 
-    # print(title_dict)
-
     # 开始读取表格内容
     read_data(sheet, title_row_num + 1, sheet.max_row, title_dict)
 
     # 保存文档
     workbook.save(savename)
 
-    # return FileResponse(savename, media_type="application/octet-stream", filename="deal.xlsx")
-    # return FileResponse(savename)
-    # return FileResponse(savename, media_type='application/xlsx', filename="deal.xlsx")
-    # return savename
     print(f"处理完了{file.filename}文件")
-    return {"code": 200, "msg": "分析完成,请点击下载查看分析结果", "filePath": f"/{shared_dir}/" + file.filename}
+
+    return {"code": 200, "msg": "分析完成,请点击下载查看分析结果", "filePath": savename}
 
 
 def calculate_age_from_id_number(id_number):
@@ -244,7 +438,6 @@ def check_risk_type(ws, row_num, title_dict):
         if risk is not None and len(risk) > 0:
             risks.append((risk, i))
 
-
     # 定义:健康帮扶,"综合保障,社会帮扶,义务教育保障, 教育帮扶, 住房安全保障, 搬迁, 饮水安全保障, 产业帮扶, 就业帮扶, 金融帮扶, 公益岗位帮扶等常量
     HEALTH_SUPPORT = "健康帮扶"
     COMPREHENSIVE_GUARANTEE = "综合保障"
@@ -275,7 +468,7 @@ def check_risk_type(ws, row_num, title_dict):
             forbinddens_option = [
                 HOUSING_SECURITY_GUARANTEE,
                 DRINKING_WATER_SECURITY_GUARANTEE,
-                EDUCATION_SUPPORT,
+                HEALTH_SUPPORT,
             ]
         elif risk == "因残":
             must_selected_option = [COMPREHENSIVE_GUARANTEE, SOCIAL_SUPPORT]
@@ -374,30 +567,12 @@ def check_assistance(ws, row_num, title_dict):
             target = ws[f"{title_dict[type]}{row_num}"].value
             for key in ["其他", "技能培训"]:
                 if key in target:
-                    comment_and_fill_yellow_for(target, f"实施开发式帮扶填写状态下,{type} 不允许选择 {key}")
+                    comment_and_fill_yellow_for(
+                        ws[f"{title_dict[type]}{row_num}"], f"实施开发式帮扶填写状态下,{type} 不允许选择 {key}"
+                    )
                     return
 
 
-# 填写了的话,剩下四项有一个是其他或者技能培训就不行
-
-# info_number = "户主证件号码"
-# identitycard = ws[f"{title_dict[info_number]}{row_num}"].value
-# if len(identitycard) not in [15, 18, 20, 22]:
-#     target = ws[f"{title_dict[info_number]}{row_num}"]
-#     comment_and_fill_yellow_for(target, "31.监测对象家庭成员证件号码位数异常(证件号码非15、18、20、22位)")
-
-
-# def get_item_values_for(ws, row_num, title_dict, items):
-#     result = []
-#     for item in items:
-#         if item not in title_dict:
-#             continue
-#         value = ws[f"{title_dict[item]}{row_num}"].value
-#         if value is not None:
-#             result.append(value)
-#     return result
-
-
 def comment_and_fill_yellow_for(target, comment):
     target.comment = Comment(text=comment, author="system")
     yellow_fill = PatternFill(patternType="solid", fgColor="FFFF00")
@@ -405,6 +580,4 @@ def comment_and_fill_yellow_for(target, comment):
 
 
 if __name__ == "__main__":
-    # result = calculate_age_from_id_number("532801200607144126")
-    # print(result)
     uvicorn.run("data_verification:app", host="0.0.0.0", port=8500, reload=True)