浏览代码

解决导入时表头多行问题

zhouxingyu 1 周之前
父节点
当前提交
05273112b1

+ 2 - 4
srm-module-code/src/main/java/org/jeecg/modules/saleCode/service/impl/SaleInterfaceSyncServiceImpl.java

@@ -619,8 +619,7 @@ public class SaleInterfaceSyncServiceImpl extends ServiceImpl<SaleInterfaceSyncM
         saleInterfaceSync.setReferenceNumber(PDFTableReader.getFields(file, "Request For Quote No.", "", true).get(0));
         saleInterfaceSync.setVesselImo(PDFTableReader.getNextLineFields(file, "IMO Number").get(0));
         saleInterfaceSync.setVesselCode(PDFTableReader.getFields(file, "Vessel Name", "", true).get(0).toUpperCase());
-        saleInterfaceSync.setSubject(
-                PDFTableReader.getFields(file, "Order Title", "Priority", true).get(0));
+        saleInterfaceSync.setSubject(PDFTableReader.getLineByTitle(file, "Request For Quote No", "Order Title", "Priority", "Spare Part Type"));
         String[] removeLine = {"Page", "Request For Quotation", "CSL Australia", "vessel m/v CSL RELIANCE", "Shipsure Version", "Notes"};
         saleInterfaceSync.setComment(Strings.join(PDFTableReader.getMultipleLineFields(file, "Supplier Notes", "Terms and Conditions", removeLine, true), ' '));
 
@@ -979,8 +978,7 @@ public class SaleInterfaceSyncServiceImpl extends ServiceImpl<SaleInterfaceSyncM
         if (matcher.find()) {
             saleInterfaceSync.setBuyerEmail(matcher.group(1));
         }
-        saleInterfaceSync.setSubject(
-                PDFTableReader.getFields(file, "Order Title", "Priority", true).get(0));
+        saleInterfaceSync.setSubject(PDFTableReader.getLineByTitle(file, "Request For Quote No", "Order Title", "Priority", "Spare Part Type"));
         saleInterfaceSync.setBuyerTelephone(PDFTableReader.getFields(file, "Tel.", "", true).get(0));
         saleInterfaceSync.setReferenceNumber(PDFTableReader.getFields(file, "Request For Quote No.", "", true).get(0));
         saleInterfaceSync.setVesselImo(PDFTableReader.getNextLineFields(file, "IMO Number").get(0));

+ 219 - 0
srm-module-code/src/main/java/org/jeecg/modules/saleCode/util/PDFTableReader.java

@@ -2,6 +2,7 @@ package org.jeecg.modules.saleCode.util;
 
 import com.alibaba.fastjson.JSONArray;
 import com.alibaba.fastjson.JSONObject;
+import com.sun.org.apache.xpath.internal.operations.Bool;
 import lombok.AllArgsConstructor;
 import lombok.Getter;
 import org.apache.logging.log4j.util.Strings;
@@ -89,6 +90,224 @@ public class PDFTableReader<T> {
         return new PDFTableReader<T>(pdfFile, beanClass);
     }
 
+    public static PdfTable initTable(MultipartFile file, String standardX) {
+        PdfTable pdfTable = new PdfTable();
+        Map<Float, PdfRow> pdfRowMap = new HashMap<>();
+        try {
+            InputStream inputStream = file.getInputStream();
+            PDDocument document = PDDocument.load(inputStream);
+            Map<Float, PdfRow> headerRows = new HashMap<>();
+            PDFTextStripper textStripper = new PDFTextStripper() {
+                private int pageNumber = 0;
+                float width = 0;
+                float height = 0;
+                float standardXPosition = 0;
+                float rowY = 0;
+
+                @Override
+                protected void writePage() throws IOException {
+                    pageNumber++;
+                    PDPage page = document.getPage(pageNumber - 1);
+                    width = page.getMediaBox().getWidth();
+                    height = page.getMediaBox().getHeight();
+                    pdfTable.setWidth(width);
+                    pdfTable.setHeight(height);
+                    super.writePage();
+                }
+
+                @Override
+                protected void writeString(String text, List<TextPosition> textPositions) throws IOException {
+                    //设置左侧空白偏移量
+                    if (text.contains(standardX)) {
+                        pdfTable.setStandardPosition(textPositions.get(0).getX());
+                    }
+
+                    //初始化每个字段的行高,宽度,对于间隔超过3个字符宽度的字段自动拆分重组为新字段(x-前一个字段的endx值大于3的,默认为新字段)
+                    //上下高度小于3的可视为一行
+                    List<PdfTextPosition> cellPosition = new ArrayList<>();
+                    Float preCharEndX = null;
+                    PdfRow pdfRow;
+                    StringBuffer textBuffer = new StringBuffer();
+                    for(int i = 0; i < textPositions.size(); i ++) {
+                        PdfTextPosition position = new PdfTextPosition();
+                        TextPosition textPosition = textPositions.get(i);
+                        position.setX(textPosition.getX());
+                        position.setEndX(textPosition.getEndX());
+                        position.setY(textPosition.getY() + (pageNumber - 1) * height);
+                        position.setPageNum(pageNumber);
+                        position.setFont(textPosition.getFont());
+                        position.setFontSize(textPosition.getFontSize());
+                        position.setUnicode(textPosition.getUnicode());
+                        if(preCharEndX != null) {
+                            //超过3个字符宽度的字段自动拆分重组为新字段
+                            if(position.getX() - preCharEndX > 3) {
+                                PdfCell pdfCell = new PdfCell();
+                                pdfCell.setPositions(cellPosition);
+                                pdfCell.setCellY(cellPosition.get(0).getY());
+                                pdfCell.setText(String.valueOf(textBuffer));
+                                pdfCell.setCellStartX(cellPosition.get(0).getX());
+                                pdfCell.setCellEndX(cellPosition.get(cellPosition.size() - 1).getEndX());
+                                pdfRow = pdfRowMap.get(pdfCell.getCellY()) == null ? new PdfRow() : pdfRowMap.get(pdfCell.getCellY());
+                                List<PdfCell> cell = pdfRow.getCell() == null ? new ArrayList<>() : pdfRow.getCell();
+                                cell.add(pdfCell);
+                                pdfRow.setCell(cell);
+                                pdfRowMap.put(pdfCell.getCellY(), pdfRow);
+
+                                textBuffer = new StringBuffer();
+                                cellPosition = new ArrayList<>();
+                            }
+                        }
+                        textBuffer.append(textPosition.getUnicode());
+                        cellPosition.add(position);
+                        preCharEndX = position.getEndX();
+                        if(i == textPositions.size() - 1) {
+                            PdfCell pdfCell = new PdfCell();
+                            pdfCell.setPositions(cellPosition);
+                            pdfCell.setCellY(cellPosition.get(0).getY());
+                            pdfCell.setText(String.valueOf(textBuffer));
+                            pdfCell.setCellStartX(cellPosition.get(0).getX());
+                            pdfCell.setCellEndX(cellPosition.get(cellPosition.size() - 1).getEndX());
+                            pdfRow = pdfRowMap.get(pdfCell.getCellY()) == null ? new PdfRow() : pdfRowMap.get(pdfCell.getCellY());
+                            List<PdfCell> cell = pdfRow.getCell() == null ? new ArrayList<>() : pdfRow.getCell();
+                            cell.add(pdfCell);
+                            pdfRow.setCell(cell);
+                            pdfRowMap.put(pdfCell.getCellY(), pdfRow);
+                        }
+                    }
+                }
+            };
+
+            textStripper.setSortByPosition(true);
+            textStripper.setStartPage(1);
+            textStripper.setEndPage(document.getNumberOfPages());
+            textStripper.getText(document);
+            document.close();
+            inputStream.close();
+            pdfTable.setPdfRows(pdfRowMap);
+        } catch (IOException e) {
+            e.printStackTrace();
+        }
+
+
+        return pdfTable;
+
+    }
+
+
+    public static PdfTable handlePdf(MultipartFile file, String standardX) {
+        PdfTable pdfTable = initTable(file, standardX);
+        //行高度重新校准,对于行高度差距小于2的,自动将下一行合并至上一行
+        adjustY(pdfTable);
+        return pdfTable;
+
+    }
+
+    public static String getLineByTitle(MultipartFile file, String standardX, String title, String afterTitle, String nextTitle) {
+        PdfTable pdfTable = handlePdf(file, standardX);
+        Map<Float, PdfRow> pdfRows = pdfTable.getPdfRows();
+        Float titleRow = Float.NaN;
+        Float nextRow = null;
+        Float titleStartX = null;
+        Float titleEndX = null;
+        Float afterTitleX = null;
+        for(Float key : pdfRows.keySet()) {
+            //确定title所在行
+            PdfRow pdfRow = pdfRows.get(key);
+            if(pdfRow == null) continue;
+            List<PdfCell> pdfCells = pdfRow.getCell();
+            for(PdfCell pdfCell : pdfCells) {
+                if(pdfCell.getText().contains(title)) {
+                    titleRow = key;
+                    titleStartX = pdfCell.getCellStartX();
+                }
+                //如果是多行,则获取截取行所在行高度
+                if(nextTitle != null) {
+                    if(pdfCell.getText().contains(nextTitle)) {
+                        nextRow = key;
+                    }
+                }
+                //如果需要获取的内容后有其他干扰title,则需要加上aftertitle辨识,只获取aftertitle之前的内容
+                //获取aftertitle的起始X
+                if(afterTitle != null) {
+                    if(pdfCell.getText().contains(afterTitle)) {
+                        afterTitleX = pdfCell.getCellStartX();
+                    }
+                }
+            }
+        }
+
+        //根据titleRow和nextRow获取多行数据
+        List<Float> collect = new ArrayList<>();
+        if(nextRow != null) {
+            Float finalTitleRow = titleRow;
+            Float finalNextRow = nextRow;
+            collect = pdfRows.keySet().stream().filter(item -> {
+                return item >= finalTitleRow && item < finalNextRow;
+            }).sorted().collect(Collectors.toList());
+        }else {
+            collect.add(titleRow);
+        }
+
+        StringBuffer resultBuffer = new StringBuffer();
+        for(Float row : collect) {
+            PdfRow pdfRow = pdfRows.get(row);
+            if(pdfRow == null) continue;
+            List<PdfCell> cells = pdfRow.getCell();
+            for(PdfCell cell : cells) {
+                //如果存在右侧隔绝列,则将每行小于此隔绝列值的数据按顺序添加,否则就添加此行所有在选定标题起始X之后的数据
+                if(afterTitleX == null) {
+                    //设置绝对大值,使每个单元格都能被包裹
+                    afterTitleX = Float.valueOf("5000");
+                }
+                if(cell.getCellStartX() >= titleStartX && cell.getCellEndX() <= afterTitleX) {
+                    resultBuffer.append(cell.getText());
+                }
+
+            }
+        }
+        return String.valueOf(resultBuffer).replaceAll(title, "").trim();
+
+
+
+
+        //确定内容行和最低限制行后,如果需要获取的内容后有其他干扰title,则需要加上aftertitle辨识,只获取aftertitle之前的内容
+    }
+
+    private static void adjustY(PdfTable pdfTable) {
+        Map<Float, PdfRow> pdfRows = pdfTable.getPdfRows();
+        Set<Float> keys = pdfRows.keySet();
+        List<Float> sortKeys =  keys.stream().sorted().collect(Collectors.toList());
+        Float preY = null;
+        Boolean isAdjust = false;
+        for(Float key : sortKeys) {
+            isAdjust = false;
+            if(preY == null) {
+                preY = key;
+                continue;
+            }
+            if(key - preY > 0 && key - preY < 2) {
+                List<PdfCell> cell1 = pdfRows.get(preY).getCell();
+                List<PdfCell> cell2 = pdfRows.get(key).getCell();
+                cell1.addAll(cell2);
+                pdfRows.put(key, null);
+                cell1.sort(new Comparator<PdfCell>() {
+                    @Override
+                    public int compare(PdfCell o1, PdfCell o2) {
+                        return (int) (o1.getCellStartX() - o2.getCellStartX());
+                    }
+                });
+                PdfRow pdfRow = pdfRows.get(preY);
+                pdfRow.setCell(cell1);
+                isAdjust = true;
+            }
+            if(!isAdjust) {
+                preY = key;
+            }
+        }
+
+        pdfTable.setPdfRows(pdfRows);
+    }
+
     /**
      * 设置PDF表格标题所在行号
      *

+ 2 - 0
srm-module-code/src/main/java/org/jeecg/modules/saleCode/vo/PdfTextPosition.java

@@ -11,6 +11,8 @@ import org.apache.pdfbox.pdmodel.font.PDFont;
 public class PdfTextPosition {
     float X;
     float Y;
+    float endX;
+    float height;
     String unicode;
     PDFont font;
     float fontSize;