Ver código fonte

pdf导入修复

zhouxingyu 1 semana atrás
pai
commit
f914ae92b1

+ 23 - 11
srm-module-code/src/main/java/org/jeecg/modules/saleCode/service/impl/SaleInterfaceSyncServiceImpl.java

@@ -9,6 +9,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.gson.JsonObject;
 import io.micrometer.core.instrument.util.StringUtils;
 import io.swagger.annotations.ApiModelProperty;
+import org.apache.logging.log4j.util.Strings;
 import org.apache.shiro.SecurityUtils;
 import org.apache.shiro.SecurityUtils;
 import org.jeecg.common.api.vo.Result;
@@ -35,6 +36,7 @@ import org.jeecg.modules.saleCode.mapper.SaleInterfaceItemMapper;
 import org.jeecg.modules.saleCode.mapper.SaleInterfaceSyncMapper;
 import org.jeecg.modules.saleCode.service.ISaleInterfaceSyncService;
 import org.jeecg.modules.saleCode.util.HttpUtils;
+import org.jeecg.modules.saleCode.util.MonthUtil;
 import org.jeecg.modules.saleCode.util.PDFTableReader;
 import org.jeecg.modules.system.mapper.SysDictMapper;
 import org.jeecgframework.poi.excel.annotation.Excel;
@@ -49,6 +51,7 @@ import java.math.BigDecimal;
 import java.text.SimpleDateFormat;
 import java.text.ParseException;
 import java.time.Instant;
+import java.time.Month;
 import java.util.*;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
@@ -599,7 +602,7 @@ public class SaleInterfaceSyncServiceImpl extends ServiceImpl<SaleInterfaceSyncM
         saleInterfaceSync.setBuyerName(split[1]);
         try {
             String[] date = split[0].split(" ");
-            String day = date[2] + "-" + date[1] + "-" + date[0];
+            String day = date[2] + "-" + MonthUtil.getMonthValue(date[1]) + "-" + date[0];
             saleInterfaceSync.setSubmittedDate(DateUtils.parseDate(day, "yyyy-MM-dd"));
         } catch (ParseException e) {
             e.printStackTrace();
@@ -616,6 +619,10 @@ public class SaleInterfaceSyncServiceImpl extends ServiceImpl<SaleInterfaceSyncM
         saleInterfaceSync.setReferenceNumber(PDFTableReader.getFields(file, "Request For Quote No.", "", true).get(0));
         saleInterfaceSync.setVesselImo(PDFTableReader.getNextLineFields(file, "IMO Number").get(0));
         saleInterfaceSync.setVesselCode(PDFTableReader.getFields(file, "Vessel Name", "", true).get(0).toUpperCase());
+        saleInterfaceSync.setSubject(
+                PDFTableReader.getFields(file, "Order Title", "Priority", true).get(0));
+        String[] removeLine = {"Page", "Request For Quotation", "CSL Australia", "vessel m/v CSL RELIANCE", "Shipsure Version", "Notes"};
+        saleInterfaceSync.setComment(Strings.join(PDFTableReader.getMultipleLineFields(file, "Supplier Notes", "Terms and Conditions", removeLine, true), ' '));
 
         String[] extraLine = {"For Component", "Order Line Notes", "Page"};
         JSONArray jsonArray = PDFTableReader.getTableByPosition(file, fieldList, ignoreList, "Supplier Notes", -1, "mediate", "Request For Quote No", "Makers Reference", extraLine, 8, 0);
@@ -631,7 +638,7 @@ public class SaleInterfaceSyncServiceImpl extends ServiceImpl<SaleInterfaceSyncM
                 }
                 saleInterfaceItem.setQuantity(jsonObject.getString("Qty"));
                 saleInterfaceItem.setSupplierPartNumber(jsonObject.getString("Makers Reference"));
-                saleInterfaceItem.setUnitOfMeasure(jsonObject.getString("UOM"));
+                saleInterfaceItem.setUnitOfMeasure(jsonObject.getString("UOM") == null ? "" : jsonObject.getString("UOM").toUpperCase());
                 saleInterfaceItem.setHeadId(id);
                 saleInterfaceItem.setComment(jsonObject.getString("Drawing Position"));
                 saleInterfaceItemMapper.insert(saleInterfaceItem);
@@ -653,10 +660,15 @@ public class SaleInterfaceSyncServiceImpl extends ServiceImpl<SaleInterfaceSyncM
         saleInterfaceSync.setBuyerTelephone(PDFTableReader.getNextLineFields(file, "Phone:").get(0));
         saleInterfaceSync.setReferenceNumber(PDFTableReader.getNextLineFields(file, "Requisition No.:").get(0));
         saleInterfaceSync.setVesselImo(PDFTableReader.getNextLineFields(file, "IMO:").get(0));
-        saleInterfaceSync.setVesselCode(PDFTableReader.getNextLineFields(file, "Vessel:").get(0));
+        saleInterfaceSync.setVesselCode(PDFTableReader.getNextLineFields(file, "Vessel:").get(0).toUpperCase());
         saleInterfaceSync.setCurrencyCode(PDFTableReader.getNextLineFields(file, "Requested Currency:").get(0));
-        saleInterfaceSync.setComment(PDFTableReader.getNextLineFields(file, "Buyer Message").get(0));
+
         try {
+            String[] removeLine = {"Buyer Message"};
+            List<String> multipleLineFields = PDFTableReader.getMultipleLineFields(file, "Buyer Message", "Please provide energy", removeLine, true);
+            if(multipleLineFields != null) {
+                saleInterfaceSync.setComment(Strings.join(multipleLineFields, ' '));
+            }
             String[] date = PDFTableReader.getNextLineFields(file, "Requisition Date:").get(0).split("/");
             String day = date[2] + "-" + date[1] + "-" + date[0];
             saleInterfaceSync.setSubmittedDate(DateUtils.parseDate(day, "yyyy-MM-dd"));
@@ -696,7 +708,7 @@ public class SaleInterfaceSyncServiceImpl extends ServiceImpl<SaleInterfaceSyncM
                 saleInterfaceItem.setDescription(jsonObject.getString("Description"));
                 saleInterfaceItem.setQuantity(jsonObject.getString("Quantity"));
                 saleInterfaceItem.setSyncItemCode(jsonObject.getString("Item Code/Part"));
-                saleInterfaceItem.setUnitOfMeasure(jsonObject.getString("UoM"));
+                saleInterfaceItem.setUnitOfMeasure(jsonObject.getString("UoM") == null ? "" : jsonObject.getString("UoM").toUpperCase());
                 saleInterfaceItem.setHeadId(id);
                 saleInterfaceItemMapper.insert(saleInterfaceItem);
             }
@@ -772,9 +784,6 @@ public class SaleInterfaceSyncServiceImpl extends ServiceImpl<SaleInterfaceSyncM
         saleInterfaceSync.setVesselCode(PDFTableReader.getFields(file, "Vessel  Name", "Quote By", true).get(0));
         saleInterfaceSync.setCurrencyCode(PDFTableReader.getFields(file, "Currency", "", true).get(0));
 
-        List<String> detailNodesPart1 = PDFTableReader.getMultipleLineFields(file, "Item Details", "1 of 2", false);
-        List<String> detailNodesPart2 = PDFTableReader.getMultipleLineFields(file, "Item Details", "Remarks To Vendor :", false);
-
         List<String> fieldList = new ArrayList<>();
         fieldList.add("S.No");
         fieldList.add("Item Code");
@@ -957,7 +966,7 @@ public class SaleInterfaceSyncServiceImpl extends ServiceImpl<SaleInterfaceSyncM
         saleInterfaceSync.setBuyerName(split[1]);
         try {
             String[] date = split[0].split(" ");
-            String day = date[2] + "-" + date[1] + "-" + date[0];
+            String day = date[2] + "-" + MonthUtil.getMonthValue(date[1]) + "-" + date[0];
             saleInterfaceSync.setSubmittedDate(DateUtils.parseDate(day, "yyyy-MM-dd"));
         } catch (ParseException e) {
             e.printStackTrace();
@@ -970,11 +979,14 @@ public class SaleInterfaceSyncServiceImpl extends ServiceImpl<SaleInterfaceSyncM
         if (matcher.find()) {
             saleInterfaceSync.setBuyerEmail(matcher.group(1));
         }
-        saleInterfaceSync.setSubject(PDFTableReader.getFields(file, "Order Title", "", true).get(0));
+        saleInterfaceSync.setSubject(
+                PDFTableReader.getFields(file, "Order Title", "Priority", true).get(0));
         saleInterfaceSync.setBuyerTelephone(PDFTableReader.getFields(file, "Tel.", "", true).get(0));
         saleInterfaceSync.setReferenceNumber(PDFTableReader.getFields(file, "Request For Quote No.", "", true).get(0));
         saleInterfaceSync.setVesselImo(PDFTableReader.getNextLineFields(file, "IMO Number").get(0));
         saleInterfaceSync.setVesselCode(PDFTableReader.getFields(file, "Vessel Name", "", true).get(0).toUpperCase());
+        String[] removeLine = {"Page", "Request For Quotation", "CSL Australia", "vessel m/v CSL RELIANCE", "Shipsure Version", "Notes"};
+        saleInterfaceSync.setComment(Strings.join(PDFTableReader.getMultipleLineFields(file, "Supplier Notes", "Terms and Conditions", removeLine, true), ' '));
 
         String[] extra = {"Page", "Plate", "Order Line Notes"};
         JSONArray jsonArray = PDFTableReader.getTableByPosition(file, fieldList, ignoreList, "Sub Total", -1, "mediate", "Request For Quote No", "Makers Reference", extra, 8, 0);
@@ -990,7 +1002,7 @@ public class SaleInterfaceSyncServiceImpl extends ServiceImpl<SaleInterfaceSyncM
                 }
                 saleInterfaceItem.setQuantity(jsonObject.getString("Qty"));
                 saleInterfaceItem.setSupplierPartNumber(jsonObject.getString("Makers Reference"));
-                saleInterfaceItem.setUnitOfMeasure(jsonObject.getString("UOM"));
+                saleInterfaceItem.setUnitOfMeasure(jsonObject.getString("UOM") == null ? "" : jsonObject.getString("UOM").toUpperCase());
                 saleInterfaceItem.setHeadId(id);
                 saleInterfaceItem.setComment(jsonObject.getString("Drawing Position"));
                 saleInterfaceItemMapper.insert(saleInterfaceItem);

+ 28 - 0
srm-module-code/src/main/java/org/jeecg/modules/saleCode/util/MonthUtil.java

@@ -0,0 +1,28 @@
+package org.jeecg.modules.saleCode.util;
+
+import java.util.HashMap;
+import java.util.Map;
+
+public class MonthUtil {
+    private static final Map<String, String> monthAbbreviationToNumberMap = new HashMap<>();
+
+    static {
+        monthAbbreviationToNumberMap.put("Jan", "01");
+        monthAbbreviationToNumberMap.put("Feb", "02");
+        monthAbbreviationToNumberMap.put("Mar", "03");
+        monthAbbreviationToNumberMap.put("Apr", "04");
+        monthAbbreviationToNumberMap.put("May", "05");
+        monthAbbreviationToNumberMap.put("Jun", "06");
+        monthAbbreviationToNumberMap.put("Jul", "07");
+        monthAbbreviationToNumberMap.put("Aug", "08");
+        monthAbbreviationToNumberMap.put("Sep", "09");
+        monthAbbreviationToNumberMap.put("Oct", "10");
+        monthAbbreviationToNumberMap.put("Nov", "11");
+        monthAbbreviationToNumberMap.put("Dec", "12");
+    }
+
+    public static String getMonthValue(String month) {
+        String monthNumber = monthAbbreviationToNumberMap.get(month);
+        return monthNumber;
+    }
+}

+ 222 - 13
srm-module-code/src/main/java/org/jeecg/modules/saleCode/util/PDFTableReader.java

@@ -28,6 +28,10 @@ import java.util.stream.Collectors;
 public class PDFTableReader<T> {
     // 换行符
     private final static String LINE_WRAP = "\r";
+    private final static String normal = "normal";
+    private final static String tableHead = "tableHead";
+    private final static String tableLine = "tableLine";
+    private final static String tableExtra = "tableExtra";
     /**
      * PDF数据缓冲器
      */
@@ -331,7 +335,74 @@ public class PDFTableReader<T> {
     }
 
 
-    public static List<String> getMultipleLineFields(MultipartFile file, String lineStartText, String lineEndText, boolean sort) {
+
+    public static List<String> getFields(MultipartFile file, String startText, String endText, String cutText, boolean sort) {
+        //获取文档坐标
+        List<String> result = new ArrayList<>();
+        InputStream inputStream = null;
+        PDDocument document = null;
+        try {
+            inputStream = file.getInputStream();
+            document = PDDocument.load(inputStream);
+            PDFTextStripper textStripper = new PDFTextStripper() {
+                @Override
+                protected void writeString(String text, List<TextPosition> textPositions) throws IOException {
+                    if (text.contains(startText)) {
+                        int startIndex = 0;
+                        int endIndex = text.length();
+                        if (Strings.isNotBlank(startText)) {
+                            startIndex = text.indexOf(startText);
+                        }
+                        if (Strings.isNotBlank(endText)) {
+                            endIndex = text.indexOf(endText, startIndex + startText.length());
+                        }
+                        if (startIndex == -1) {
+                            startIndex = 0;
+                        }
+                        if (endIndex == -1) {
+                            endIndex = text.length();
+                        }
+                        result.add(text.substring(startIndex + startText.length(), endIndex).trim());
+                    }
+                }
+            };
+
+            textStripper.setSortByPosition(sort);
+            textStripper.setStartPage(1);
+            textStripper.setEndPage(document.getNumberOfPages());
+
+            textStripper.getText(document);
+
+            document.close();
+
+
+        } catch (IOException e) {
+            e.printStackTrace();
+        } finally {
+            if (document != null) {
+                try {
+                    document.close();
+                } catch (IOException e) {
+                    e.printStackTrace();
+                }
+            }
+            if (inputStream != null) {
+                try {
+                    inputStream.close();
+                } catch (IOException e) {
+                    e.printStackTrace();
+                }
+            }
+
+        }
+        if (result == null || result.size() == 0) {
+            result.add(" ");
+        }
+        return result;
+    }
+
+
+    public static List<String> getMultipleLineFields(MultipartFile file, String lineStartText, String lineEndText, String[] extraLines, boolean sort) {
         //获取文档坐标
         List<String> result = new ArrayList<>();
         final boolean[] startRecord = {false};
@@ -343,6 +414,7 @@ public class PDFTableReader<T> {
             PDFTextStripper textStripper = new PDFTextStripper() {
                 @Override
                 protected void writeString(String text, List<TextPosition> textPositions) throws IOException {
+                    boolean isRemove = false;
                     if (text.contains(lineStartText)) {
                         startRecord[0] = true;
                     }
@@ -353,7 +425,14 @@ public class PDFTableReader<T> {
                         }
                     }
                     if (startRecord[0]) {
-                        result.add(text.trim());
+                        for(String line : extraLines) {
+                            if(text.contains(line)) {
+                                isRemove = true;
+                            }
+                        }
+                        if(!isRemove) {
+                            result.add(text.trim());
+                        }
                     }
                 }
             };
@@ -544,13 +623,8 @@ public class PDFTableReader<T> {
                             cell.setPdFont(textPositions.get(0).getFont());
                             rowCells.add(cell);
                             cellRow.setCell(rowCells);
-
-
                         }
-
                     }
-
-
                 } else {
                     cell.setPositions(textPositions);
                     cell.setText(String.valueOf(key).split("-@@@-")[0]);
@@ -608,16 +682,17 @@ public class PDFTableReader<T> {
 
         List<Float> dataLines = new ArrayList<>();
 
-        dataColumn = 0;
-        for(Float key : limitObject.keySet()) {
-            if(limitObject.get(key).size() > dataColumn) {
-                dataColumn = limitObject.get(key).size();
-            }
-        }
+        dataColumn = fields.size()/2 + 1;
+//        for(Float key : limitObject.keySet()) {
+//            if(limitObject.get(key).size() > dataColumn) {
+//                dataColumn = limitObject.get(key).size();
+//            }
+//        }
         for (Float key : limitObject.keySet()) {
             boolean isHeader = false;
             //去除标题行
 
+            //判断是否为数据行
             if (limitObject.get(key).size() >= dataColumn) {
                 for (PdfRow headerRow : headerRows) {
                     if (headerRow.getRowY() == key) {
@@ -702,6 +777,140 @@ public class PDFTableReader<T> {
         return result;
     }
 
+//    public static PdfTable getLinesByParsePdf(MultipartFile file, List<String> fields, List<String> ignoreFields, String endKey, int tableEndIndex, Map<String, List<PdfTextPosition>> documentPositions, String headerAlignment, String standardX, String headerYText, String[] extraLines, int headerLimit) {
+//        PdfTable pdfTable = new PdfTable();
+//        try {
+//            InputStream inputStream = file.getInputStream();
+//            PDDocument document = PDDocument.load(inputStream);
+//
+//            PDFTextStripper textStripper = new PDFTextStripper() {
+//                private int pageNumber = 0;
+//                float width = 0;
+//                float height = 0;
+//                float standardXPosition = 0;
+//                float rowY = 0;
+//
+//                @Override
+//                protected void writePage() throws IOException {
+//                    pageNumber++;
+//                    PDPage page = document.getPage(pageNumber - 1);
+//                    width = page.getMediaBox().getWidth();
+//                    height = page.getMediaBox().getHeight();
+//                    pdfTable.setWidth(width);
+//                    pdfTable.setHeight(height);
+//                    super.writePage();
+//                }
+//
+//                @Override
+//                protected void writeString(String text, List<TextPosition> textPositions) throws IOException {
+//                    //设置左侧空白偏移量
+//                    if (text.contains(standardX)) {
+//                        pdfTable.setStandardPosition(textPositions.get(0).getX());
+//                    }
+//
+//                    //重置textPositions各字段高度,加上每页页码*高度
+//                    List<PdfTextPosition> positions = new ArrayList<>();
+//                    for (TextPosition textPosition : textPositions) {
+//                        PdfTextPosition position = new PdfTextPosition();
+//                        position.setX(textPosition.getX());
+//                        position.setY(textPosition.getY() + (pageNumber - 1) * height);
+//                        position.setPageNum(pageNumber);
+//                        position.setFont(textPosition.getFont());
+//                        position.setFontSize(textPosition.getFontSize());
+//                        position.setUnicode(textPosition.getUnicode());
+//                        positions.add(position);
+//                    }
+//                    //寻找表格中各字段隔绝行
+//                    if (extraLines != null) {
+//                        List<Float> extraLinesBorder = pdfTable.getExtraLinesBorder() == null ? new ArrayList<>() : pdfTable.getExtraLinesBorder();
+//                        for (String extra : extraLines) {
+//                            if (text.contains(extra)) {
+//                                extraLinesBorder.add(positions.get(0).getY());
+//                            }
+//                        }
+//                        pdfTable.setExtraLinesBorder(extraLinesBorder);
+//                    }
+//
+//                    documentPositions.put(text + "-@@@-" + (positions.get(0).getX() + positions.get(0).getY()), positions);
+//                    //设置行高
+//                    if (text.contains(headerYText)) {
+//                        rowY = positions.get(0).getY();
+//                        PdfRow pdfRow = headerRows.get(rowY) == null ? new PdfRow() : headerRows.get(rowY);
+//                        pdfRow.setRowY(rowY);
+//                        headerRows.put(rowY, pdfRow);
+//                    }
+//                    //获取header所在行
+//
+//                    PdfRow headerRow = null;
+//                    List<PdfCell> pdfCells = null;
+//                    for (String filed : fields) {
+//                        if (text.contains(filed)) {
+//                            headerRow = headerRows.get(positions.get(0).getY()) == null ? new PdfRow() : headerRows.get(positions.get(0).getY());
+//                            pdfCells = headerRow.getCell() == null ? new ArrayList<>() : headerRow.getCell();
+//                            PdfCell pdfCell = new PdfCell();
+//                            //文本识别可能出错,会带多个字符,需识别标题在此文本中的位置并重新赋起始结束x值
+//                            int firstPosition = text.indexOf(filed);
+//                            int lastPosition = firstPosition + filed.length() - 1;
+//                            pdfCell.setCellStartX(positions.get(0).getX());
+//                            pdfCell.setCellEndX(positions.get(positions.size() - 1).getX());
+//                            pdfCell.setCellY(positions.get(0).getY());
+//                            pdfCell.setPdFont(textPositions.get(0).getFont());
+//                            pdfCell.setText(filed);
+//                            pdfCell.setFontSize(textPositions.get(0).getFontSize());
+//                            pdfCell.setCellStartX(textPositions.get(firstPosition).getX());
+//                            pdfCell.setCellEndX(textPositions.get(lastPosition).getX());
+//                            pdfCells.add(pdfCell);
+//                            headerRow.setCell(pdfCells);
+//                            headerRows.put(positions.get(0).getY(), headerRow);
+//                        }
+//                    }
+//
+//
+//                    //添加忽略行
+//                    for (String ignoreField : ignoreFields) {
+//                        if (text.contains(ignoreField)) {
+//                            Map<Float, Boolean> ignoreRows = pdfTable.getIgnoreRows() == null ? new HashMap<>() : pdfTable.getIgnoreRows();
+//                            PdfRow pdfRow = new PdfRow();
+//                            pdfRow.setRowY(positions.get(0).getY());
+//                            ignoreRows.put(pdfRow.getRowY(), true);
+//                            pdfTable.setIgnoreRows(ignoreRows);
+//                        }
+//                    }
+//
+//
+//                    //提供了表格结束行数据就用,不用就按给的字段模糊匹
+//                    if (tableEndIndex == -1) {
+//                        if (Strings.isBlank(endKey)) {
+//                            pdfTable.setTableEndY(document.getNumberOfPages() * height);
+//                        } else if (text.contains(endKey)) {
+//                            pdfTable.setTableEndY(positions.get(0).getY());
+//                        }
+//                    } else {
+//                        pdfTable.setTableEndY(tableEndIndex);
+//                    }
+////                    if (headerRow != null && headerRow.getRowY() > 0) {
+////                        if (headerRow.getCell() != null && headerRow.getCell().size() > 0) {
+////                            pdfTable.setTableStartY(headerRow.getCell().get(0).getCellY());
+////                        }
+////                       // pdfTable.setHeaderRows(rows);
+////                    }
+//
+//                }
+//
+//
+//            };
+//
+//            textStripper.setSortByPosition(true);
+//            textStripper.setStartPage(1);
+//            textStripper.setEndPage(document.getNumberOfPages());
+//            textStripper.getText(document);
+//            document.close();
+//            inputStream.close();
+//        } catch (Exception e) {
+//            e.printStackTrace();
+//        }
+//    }
+
     public static PdfTable getHeaderAndIgnoreLine(MultipartFile file, List<String> fields, List<String> ignoreFields, String endKey, int tableEndIndex, Map<String, List<PdfTextPosition>> documentPositions, String headerAlignment, String standardX, String headerYText, String[] extraLines, int headerLimit) {
         PdfTable pdfTable = new PdfTable();
 

+ 2 - 0
srm-module-code/src/main/java/org/jeecg/modules/saleCode/vo/PdfRow.java

@@ -11,9 +11,11 @@ import java.util.List;
 @AllArgsConstructor
 @NoArgsConstructor
 public class PdfRow {
+
     int rowNum;
     float rowX;
     float rowY;
+    String rowType;//行属性,
     PDFont pdFont;
     float height;
     List<PdfCell> cell;