diff --git a/README.md b/README.md index 08aae62..9f385c0 100644 --- a/README.md +++ b/README.md @@ -64,9 +64,13 @@ $ npx ts-node html-template.ts หรือใช้ Rest Client ดูไฟล์ [api.http](./api.http) ตรง http header จะใช้ accept เป็นตัวบอกว่าต้องการผลเป็นไฟล์แบบไหนโดยใช้ [Mime type](https://developer.mozilla.org/en-US/docs/Web/HTTP/MIME_types/Common_types) เพื่อเป็นมาตรฐาน ให้ดูที่รองรับในฟังก์ชั่น [mimeToExtension](./libs/report-template.ts) +การตั้งค่าของ template อยู่ที่ [templateOption](./libs/report-template.ts) ### HTML -แปลงจาก URL เป็น pdf,png,jpg รองรับการตัดคำไทย ฟีเจอร์ template ยังไม่เสร็จ +แปลงจาก URL เป็น pdf,png,jpg รองรับการตัดคำไทย การแปลงเป็น +- [templateOption.htmlTemplateOption](./libs/report-template.ts) เป็นการตั้งค่าเฉพาะของโมดุลนี้ +- templateOption.htmlTemplateOption.PDFOptions ซึ่งเป็น [PDFOptions](https://pptr.dev/api/puppeteer.pdfoptions) ของ puppeteer +ฟีเจอร์ template ยังไม่เสร็จ ``` sh # Grafana dashboard to pdf curl -X 'POST' \ @@ -80,6 +84,7 @@ curl -X 'POST' \ } }' -o html-grafana.pdf + # url to png curl -X 'POST' 'http://localhost:3001/api/v1/report-template/html' \ -H 'accept: image/png' -H 'Content-Type: application/json' \ @@ -88,8 +93,31 @@ curl -X 'POST' 'http://localhost:3001/api/v1/report-template/html' \ curl -X 'POST' 'http://localhost:3001/api/v1/report-template/html' \ -H 'accept: image/jpeg' -H 'Content-Type: application/json' \ -d '{"template": "https://pantip.com/","reportName": "html-blognone"}' -o html-pantip.jpeg +# url to pdf pantip.com very long lazyload you +curl -X 'POST' 'http://localhost:3001/api/v1/report-template/html' \ + -H 'accept: application/pdf' -H 'Content-Type: application/json' \ + -d '{"template": "https://pantip.com/","reportName": "html-blognone", + "htmlOption": { + "preloadWait":500, + "preloadScroll":1000, + "preloadLoop":25, + "pdfOption":{"format":"A4"} + } + }' -o html-pantip.pdf + + ``` +# pantip.com to pdf +curl -X 'POST' \ + 'http://localhost:3001/api/v1/report-template/html' \ + -H 'accept: application/pdf' -H 'Content-Type: application/json' \ + -d '{ + "template": "https://pantip.com", + "reportName": "html-pantip.com" +}' -o html-pantip.com.pdf + + ### docx แปลงจากเทมเพลทไฟล์ .docx เป็น docx,pdf,png ```sh diff --git a/api.http b/api.http index ce60fcb..b247ad9 100644 --- a/api.http +++ b/api.http @@ -109,7 +109,7 @@ Accept: application/pdf } } -### convert blognone to pdf +### convert blognone to png POST {{api_host}}/html Content-Type: application/json Accept: image/png @@ -118,3 +118,13 @@ Accept: image/png "template": "https://www.blognone.com", "reportName": "html-blognone" } + +### convert blognone to pdf +POST {{api_host}}/html +Content-Type: application/json +Accept: application/pdf + +{ + "template": "https://pantip.com", + "reportName": "html-pantip.com" +} diff --git a/libs/html-templates-lib.ts b/libs/html-templates-lib.ts index 43a3fdd..869b27c 100644 --- a/libs/html-templates-lib.ts +++ b/libs/html-templates-lib.ts @@ -3,15 +3,18 @@ export const htmlTemplateRoute = express.Router() import { mimeToExtension, templateOption } from "./report-template" // import fs from "fs" //import { chromium } from 'playwright' -import puppeteer,{PDFOptions} from 'puppeteer' +import puppeteer, {PDFOptions} from 'puppeteer' import Handlebars from 'handlebars' -import e from "express" + //import { createReport } from "docx-templates" // แก้ package.json ของ LibreOfficeFileConverter // https://github.com/microsoft/TypeScript/issues/52363#issuecomment-1659179354 //import { LibreOfficeFileConverter } from "libreoffice-file-converter" const TEMPLATE_FOLDER_NAME = "templates/html" -const width_px = 1200; //TODO read from htmlOption + +function wait(ms: number) { + return new Promise(resolve => setTimeout(resolve, ms)); +} /** * docxTemplate Uses docx-template to convert input data and template to output buffer. @@ -19,98 +22,90 @@ const width_px = 1200; //TODO read from htmlOption * You have to handle exception throw by function * handlebars template support only content from Buffer * @param {Buffer|String} t template in buffer format or url to web page - * @param {templateOption} tdata Template Information in JSON format + * @param {templateOption} templOpt Template Information in JSON format * @param {String} outputMediaType output extension, support pdf, jpeg, png * @return {Promise} output buffer after apply template. */ -export async function htmlTemplateX(t: Buffer | String, tdata: templateOption, outputMediaType: string = "pdf"): Promise { +export async function htmlTemplateX(t: Buffer | String, templOpt: templateOption, outputMediaType: string = "pdf"): Promise { try { if (!["pdf", "jpeg", "png"].find((e) => e === outputMediaType)) { throw "FormatError" } - const browser = await puppeteer.launch({ headless: true,args: ['--no-sandbox'] }); + const browser = await puppeteer.launch({ headless: true, args: ['--no-sandbox'] }); const page = await browser.newPage(); - page.setDefaultNavigationTimeout(120000); - await page.setViewport({ - width: width_px, - height: 800, - deviceScaleFactor: 2, - isMobile: false - }); + if(templOpt.htmlOption?.navigationTimeout) + page.setDefaultNavigationTimeout(120000); + if (typeof t === 'string') { - await page.goto(t, { waitUntil: 'networkidle0' }); + + switch(templOpt.htmlOption?.waitUntil){ + case 'networkidle0': await page.goto(t, { waitUntil: 'networkidle0' }) + break + case 'networkidle2': await page.goto(t, { waitUntil: 'networkidle2' }) + break + default: await page.goto(t) + } } else { - if (tdata.data) { + if (templOpt.data) { const template = Handlebars.compile(t.toString()); - const html = template(tdata.data); + const html = template(templOpt.data); await page.setContent(html); } else { await page.setContent(t.toString()) } } + const totalHeight = await page.evaluate(async (_templOpt) => { + //force scroll for lazy load page + const _scroll = _templOpt.htmlOption?.preloadScroll??1000 + const _wait = _templOpt.htmlOption?.preloadWait??400 + const _loop =_templOpt.htmlOption?.preloadLoop??4 - -/* - // try to load whole page - let x = await page.evaluate(async (tdata) => { - const scrollableSection = - (tdata.htmlOption?.querySelector && document.querySelector(tdata.htmlOption.querySelector)) ? - document.querySelector(tdata.htmlOption.querySelector) : document.body - if (scrollableSection) { - const childElement = scrollableSection.firstElementChild; - let scrollPosition = 0; - let viewportHeight = window.innerHeight; - if (childElement) - while (scrollPosition < childElement.scrollHeight) { - scrollableSection.scrollBy(0, viewportHeight); - await new Promise(resolve => setTimeout(resolve, 500)); - scrollPosition += viewportHeight; - } - return scrollPosition + for (let i = 0; i < _loop; i++) { + window.scrollBy(0, _scroll); + await new Promise(resolve => setTimeout(resolve, _wait)); } - return 0 - }, tdata); - //console.log("scrollPosition=" + x) + //extra wait + await new Promise(resolve => setTimeout(resolve, _wait*2)); -*/ - //find real page height - const totalHeight = await page.evaluate(async (tdata) => { let scrollableSection = - (tdata.htmlOption?.querySelector && - document.querySelector(tdata.htmlOption.querySelector) && document.querySelector(tdata.htmlOption.querySelector)) ? - document.querySelector(tdata.htmlOption.querySelector) : null + (_templOpt.htmlOption?.querySelector && + document.querySelector(_templOpt.htmlOption.querySelector)) ? + document.querySelector(_templOpt.htmlOption.querySelector) : null - const childElement = scrollableSection? scrollableSection: document.body - if(scrollableSection ==null) + const childElement = scrollableSection ? scrollableSection : document.body + + if (scrollableSection == null) scrollableSection = document.body - //const childElement = scrollableSection.firstElementChild; - let scrollPosition = 0; - let viewportHeight = window.innerHeight; - while (scrollPosition < childElement.scrollHeight) { - scrollableSection.scrollBy(0, viewportHeight); - await new Promise(resolve => setTimeout(resolve, 500)); - scrollPosition += viewportHeight; - } - return childElement.scrollHeight - }, tdata); + //const childElement = scrollableSection.firstElementChild; + let scrollPosition = 0; + let viewportHeight = window.innerHeight; + while (scrollPosition < childElement.scrollHeight) { + scrollableSection.scrollBy(0, viewportHeight); + await new Promise(resolve => setTimeout(resolve, 500)); + scrollPosition += viewportHeight; + } + //return scrollPosition + return childElement.scrollHeight + }, templOpt); + console.log("totalHeight") if (!totalHeight) { throw new Error(`Unable to determine the page height ${totalHeight}. The selector may not correct or no body tag`); } else { console.log("Page height adjusted to:", totalHeight); } - console.log("set viewport ") await page.setViewport({ - width: width_px, + width: Number(templOpt.htmlOption?.pdfOption?.width??1200), height: totalHeight, deviceScaleFactor: 2, isMobile: false }); + ///// output to photo end here if (outputMediaType === "png" || outputMediaType === "jpeg") { - const photoBuffer = await page.screenshot({ + const photoBuffer = await page.screenshot({ // path: 'url_pup.png', fullPage: true, type: outputMediaType // 'webp' @@ -120,16 +115,21 @@ export async function htmlTemplateX(t: Buffer | String, tdata: templateOption, o } ///// output to PDF //TODO overide option from htmlTemplateOption - let pdfOption:PDFOptions = { + let o:PDFOptions ={ // path: './url_prop.pdf', // format:"A4", - width: width_px, + width: 1200, height: totalHeight, printBackground: true, scale: 1, displayHeaderFooter: false, margin: { top: 5, right: 5, bottom: 5, left: 5 } } + //Murge with default + if(templOpt.htmlOption?.pdfOption) + o = {...o,...templOpt.htmlOption.pdfOption} + const {path,...pdfOption} = o //remove path if exists + console.log("pdfOption:",pdfOption) const buffer = await page.pdf(pdfOption); await browser.close(); return buffer @@ -189,12 +189,12 @@ htmlTemplateRoute.post("/", async function (req, res) { res.setHeader("Content-Length", buffer.length) res.end(buffer) } catch (ex) { - if(ex instanceof SyntaxError){ + if (ex instanceof SyntaxError) { res.statusCode = 400 res.statusMessage = ex.message res.end(res.statusMessage) console.error("report-template/html: ", ex) - }else{ + } else { res.statusCode = 500 res.statusMessage = "Internal Server Error during POST report-template/html" res.end(res.statusMessage) diff --git a/libs/report-template.ts b/libs/report-template.ts index 58e3e26..ed5339f 100644 --- a/libs/report-template.ts +++ b/libs/report-template.ts @@ -1,4 +1,4 @@ - +import { PDFOptions } from 'puppeteer' /** * @prop {string} template template ID * @prop {string} reportName outputname @@ -12,14 +12,22 @@ export interface templateOption { data: object } /** - * @prop {string} querySelector template ID - * @prop {object} pdfOption outputname - * @prop {number} width support only html-template + * @prop {number} navigationTimeout page.setDefaultNavigationTimeout(navigationTimeout) + * @prop {number} querySelector Element of page + * @prop {number} waitUntil 'networkidle0' or 'networkidle0' + * @prop {number} preloadWait wait in the scroll loop in ms second + * @prop {number} preloadScroll scroll height + * @prop {number} preloadLoop number of retry preload lazy load page + * @prop {PDFOptions} pdfOption PdfOptions of Puppeteer */ export interface htmlTemplateOption { - querySelector: string - pdfOption?: object - width?:number + navigationTimeout?:number + querySelector?: string + waitUntil?:string + preloadWait?:number + preloadScroll?:number + preloadLoop?:number + pdfOption?: PDFOptions }