support puppeteer's PDFOptions, add param for preload

This commit is contained in:
oom 2025-02-28 10:31:11 +07:00
parent f6b68e4379
commit c33e0653af
4 changed files with 117 additions and 71 deletions

View file

@ -64,9 +64,13 @@ $ npx ts-node html-template.ts
หรือใช้ Rest Client ดูไฟล์ [api.http](./api.http) หรือใช้ Rest Client ดูไฟล์ [api.http](./api.http)
ตรง http header จะใช้ accept เป็นตัวบอกว่าต้องการผลเป็นไฟล์แบบไหนโดยใช้ [Mime type](https://developer.mozilla.org/en-US/docs/Web/HTTP/MIME_types/Common_types) เพื่อเป็นมาตรฐาน ให้ดูที่รองรับในฟังก์ชั่น [mimeToExtension](./libs/report-template.ts) ตรง http header จะใช้ accept เป็นตัวบอกว่าต้องการผลเป็นไฟล์แบบไหนโดยใช้ [Mime type](https://developer.mozilla.org/en-US/docs/Web/HTTP/MIME_types/Common_types) เพื่อเป็นมาตรฐาน ให้ดูที่รองรับในฟังก์ชั่น [mimeToExtension](./libs/report-template.ts)
การตั้งค่าของ template อยู่ที่ [templateOption](./libs/report-template.ts)
### HTML ### HTML
แปลงจาก URL เป็น pdf,png,jpg รองรับการตัดคำไทย ฟีเจอร์ template ยังไม่เสร็จ แปลงจาก URL เป็น pdf,png,jpg รองรับการตัดคำไทย การแปลงเป็น
- [templateOption.htmlTemplateOption](./libs/report-template.ts) เป็นการตั้งค่าเฉพาะของโมดุลนี้
- templateOption.htmlTemplateOption.PDFOptions ซึ่งเป็น [PDFOptions](https://pptr.dev/api/puppeteer.pdfoptions) ของ puppeteer
ฟีเจอร์ template ยังไม่เสร็จ
``` sh ``` sh
# Grafana dashboard to pdf # Grafana dashboard to pdf
curl -X 'POST' \ curl -X 'POST' \
@ -80,6 +84,7 @@ curl -X 'POST' \
} }
}' -o html-grafana.pdf }' -o html-grafana.pdf
# url to png # url to png
curl -X 'POST' 'http://localhost:3001/api/v1/report-template/html' \ curl -X 'POST' 'http://localhost:3001/api/v1/report-template/html' \
-H 'accept: image/png' -H 'Content-Type: application/json' \ -H 'accept: image/png' -H 'Content-Type: application/json' \
@ -88,8 +93,31 @@ curl -X 'POST' 'http://localhost:3001/api/v1/report-template/html' \
curl -X 'POST' 'http://localhost:3001/api/v1/report-template/html' \ curl -X 'POST' 'http://localhost:3001/api/v1/report-template/html' \
-H 'accept: image/jpeg' -H 'Content-Type: application/json' \ -H 'accept: image/jpeg' -H 'Content-Type: application/json' \
-d '{"template": "https://pantip.com/","reportName": "html-blognone"}' -o html-pantip.jpeg -d '{"template": "https://pantip.com/","reportName": "html-blognone"}' -o html-pantip.jpeg
# url to pdf pantip.com very long lazyload you
curl -X 'POST' 'http://localhost:3001/api/v1/report-template/html' \
-H 'accept: application/pdf' -H 'Content-Type: application/json' \
-d '{"template": "https://pantip.com/","reportName": "html-blognone",
"htmlOption": {
"preloadWait":500,
"preloadScroll":1000,
"preloadLoop":25,
"pdfOption":{"format":"A4"}
}
}' -o html-pantip.pdf
``` ```
# pantip.com to pdf
curl -X 'POST' \
'http://localhost:3001/api/v1/report-template/html' \
-H 'accept: application/pdf' -H 'Content-Type: application/json' \
-d '{
"template": "https://pantip.com",
"reportName": "html-pantip.com"
}' -o html-pantip.com.pdf
### docx ### docx
แปลงจากเทมเพลทไฟล์ .docx เป็น docx,pdf,png แปลงจากเทมเพลทไฟล์ .docx เป็น docx,pdf,png
```sh ```sh

View file

@ -109,7 +109,7 @@ Accept: application/pdf
} }
} }
### convert blognone to pdf ### convert blognone to png
POST {{api_host}}/html POST {{api_host}}/html
Content-Type: application/json Content-Type: application/json
Accept: image/png Accept: image/png
@ -118,3 +118,13 @@ Accept: image/png
"template": "https://www.blognone.com", "template": "https://www.blognone.com",
"reportName": "html-blognone" "reportName": "html-blognone"
} }
### convert blognone to pdf
POST {{api_host}}/html
Content-Type: application/json
Accept: application/pdf
{
"template": "https://pantip.com",
"reportName": "html-pantip.com"
}

View file

@ -3,15 +3,18 @@ export const htmlTemplateRoute = express.Router()
import { mimeToExtension, templateOption } from "./report-template" import { mimeToExtension, templateOption } from "./report-template"
// import fs from "fs" // import fs from "fs"
//import { chromium } from 'playwright' //import { chromium } from 'playwright'
import puppeteer,{PDFOptions} from 'puppeteer' import puppeteer, {PDFOptions} from 'puppeteer'
import Handlebars from 'handlebars' import Handlebars from 'handlebars'
import e from "express"
//import { createReport } from "docx-templates" //import { createReport } from "docx-templates"
// แก้ package.json ของ LibreOfficeFileConverter // แก้ package.json ของ LibreOfficeFileConverter
// https://github.com/microsoft/TypeScript/issues/52363#issuecomment-1659179354 // https://github.com/microsoft/TypeScript/issues/52363#issuecomment-1659179354
//import { LibreOfficeFileConverter } from "libreoffice-file-converter" //import { LibreOfficeFileConverter } from "libreoffice-file-converter"
const TEMPLATE_FOLDER_NAME = "templates/html" const TEMPLATE_FOLDER_NAME = "templates/html"
const width_px = 1200; //TODO read from htmlOption
function wait(ms: number) {
return new Promise(resolve => setTimeout(resolve, ms));
}
/** /**
* docxTemplate Uses docx-template to convert input data and template to output buffer. * docxTemplate Uses docx-template to convert input data and template to output buffer.
@ -19,98 +22,90 @@ const width_px = 1200; //TODO read from htmlOption
* You have to handle exception throw by function * You have to handle exception throw by function
* handlebars template support only content from Buffer * handlebars template support only content from Buffer
* @param {Buffer|String} t template in buffer format or url to web page * @param {Buffer|String} t template in buffer format or url to web page
* @param {templateOption} tdata Template Information in JSON format * @param {templateOption} templOpt Template Information in JSON format
* @param {String} outputMediaType output extension, support pdf, jpeg, png * @param {String} outputMediaType output extension, support pdf, jpeg, png
* @return {Promise<Uint8Array>} output buffer after apply template. * @return {Promise<Uint8Array>} output buffer after apply template.
*/ */
export async function htmlTemplateX(t: Buffer | String, tdata: templateOption, outputMediaType: string = "pdf"): Promise<Uint8Array> { export async function htmlTemplateX(t: Buffer | String, templOpt: templateOption, outputMediaType: string = "pdf"): Promise<Uint8Array> {
try { try {
if (!["pdf", "jpeg", "png"].find((e) => e === outputMediaType)) { if (!["pdf", "jpeg", "png"].find((e) => e === outputMediaType)) {
throw "FormatError" throw "FormatError"
} }
const browser = await puppeteer.launch({ headless: true,args: ['--no-sandbox'] }); const browser = await puppeteer.launch({ headless: true, args: ['--no-sandbox'] });
const page = await browser.newPage(); const page = await browser.newPage();
page.setDefaultNavigationTimeout(120000); if(templOpt.htmlOption?.navigationTimeout)
await page.setViewport({ page.setDefaultNavigationTimeout(120000);
width: width_px,
height: 800,
deviceScaleFactor: 2,
isMobile: false
});
if (typeof t === 'string') { if (typeof t === 'string') {
await page.goto(t, { waitUntil: 'networkidle0' });
switch(templOpt.htmlOption?.waitUntil){
case 'networkidle0': await page.goto(t, { waitUntil: 'networkidle0' })
break
case 'networkidle2': await page.goto(t, { waitUntil: 'networkidle2' })
break
default: await page.goto(t)
}
} else { } else {
if (tdata.data) { if (templOpt.data) {
const template = Handlebars.compile(t.toString()); const template = Handlebars.compile(t.toString());
const html = template(tdata.data); const html = template(templOpt.data);
await page.setContent(html); await page.setContent(html);
} else { } else {
await page.setContent(t.toString()) await page.setContent(t.toString())
} }
} }
const totalHeight = await page.evaluate(async (_templOpt) => {
//force scroll for lazy load page
const _scroll = _templOpt.htmlOption?.preloadScroll??1000
const _wait = _templOpt.htmlOption?.preloadWait??400
const _loop =_templOpt.htmlOption?.preloadLoop??4
for (let i = 0; i < _loop; i++) {
/* window.scrollBy(0, _scroll);
// try to load whole page await new Promise(resolve => setTimeout(resolve, _wait));
let x = await page.evaluate(async (tdata) => {
const scrollableSection =
(tdata.htmlOption?.querySelector && document.querySelector(tdata.htmlOption.querySelector)) ?
document.querySelector(tdata.htmlOption.querySelector) : document.body
if (scrollableSection) {
const childElement = scrollableSection.firstElementChild;
let scrollPosition = 0;
let viewportHeight = window.innerHeight;
if (childElement)
while (scrollPosition < childElement.scrollHeight) {
scrollableSection.scrollBy(0, viewportHeight);
await new Promise(resolve => setTimeout(resolve, 500));
scrollPosition += viewportHeight;
}
return scrollPosition
} }
return 0 //extra wait
}, tdata); await new Promise(resolve => setTimeout(resolve, _wait*2));
//console.log("scrollPosition=" + x)
*/
//find real page height
const totalHeight = await page.evaluate(async (tdata) => {
let scrollableSection = let scrollableSection =
(tdata.htmlOption?.querySelector && (_templOpt.htmlOption?.querySelector &&
document.querySelector(tdata.htmlOption.querySelector) && document.querySelector(tdata.htmlOption.querySelector)) ? document.querySelector(_templOpt.htmlOption.querySelector)) ?
document.querySelector(tdata.htmlOption.querySelector) : null document.querySelector(_templOpt.htmlOption.querySelector) : null
const childElement = scrollableSection? scrollableSection: document.body const childElement = scrollableSection ? scrollableSection : document.body
if(scrollableSection ==null)
if (scrollableSection == null)
scrollableSection = document.body scrollableSection = document.body
//const childElement = scrollableSection.firstElementChild; //const childElement = scrollableSection.firstElementChild;
let scrollPosition = 0; let scrollPosition = 0;
let viewportHeight = window.innerHeight; let viewportHeight = window.innerHeight;
while (scrollPosition < childElement.scrollHeight) { while (scrollPosition < childElement.scrollHeight) {
scrollableSection.scrollBy(0, viewportHeight); scrollableSection.scrollBy(0, viewportHeight);
await new Promise(resolve => setTimeout(resolve, 500)); await new Promise(resolve => setTimeout(resolve, 500));
scrollPosition += viewportHeight; scrollPosition += viewportHeight;
} }
return childElement.scrollHeight //return scrollPosition
}, tdata); return childElement.scrollHeight
}, templOpt);
console.log("totalHeight")
if (!totalHeight) { if (!totalHeight) {
throw new Error(`Unable to determine the page height ${totalHeight}. The selector may not correct or no body tag`); throw new Error(`Unable to determine the page height ${totalHeight}. The selector may not correct or no body tag`);
} else { } else {
console.log("Page height adjusted to:", totalHeight); console.log("Page height adjusted to:", totalHeight);
} }
console.log("set viewport ")
await page.setViewport({ await page.setViewport({
width: width_px, width: Number(templOpt.htmlOption?.pdfOption?.width??1200),
height: totalHeight, height: totalHeight,
deviceScaleFactor: 2, deviceScaleFactor: 2,
isMobile: false isMobile: false
}); });
///// output to photo end here ///// output to photo end here
if (outputMediaType === "png" || outputMediaType === "jpeg") { if (outputMediaType === "png" || outputMediaType === "jpeg") {
const photoBuffer = await page.screenshot({ const photoBuffer = await page.screenshot({
// path: 'url_pup.png', // path: 'url_pup.png',
fullPage: true, fullPage: true,
type: outputMediaType // 'webp' type: outputMediaType // 'webp'
@ -120,16 +115,21 @@ export async function htmlTemplateX(t: Buffer | String, tdata: templateOption, o
} }
///// output to PDF ///// output to PDF
//TODO overide option from htmlTemplateOption //TODO overide option from htmlTemplateOption
let pdfOption:PDFOptions = { let o:PDFOptions ={
// path: './url_prop.pdf', // path: './url_prop.pdf',
// format:"A4", // format:"A4",
width: width_px, width: 1200,
height: totalHeight, height: totalHeight,
printBackground: true, printBackground: true,
scale: 1, scale: 1,
displayHeaderFooter: false, displayHeaderFooter: false,
margin: { top: 5, right: 5, bottom: 5, left: 5 } margin: { top: 5, right: 5, bottom: 5, left: 5 }
} }
//Murge with default
if(templOpt.htmlOption?.pdfOption)
o = {...o,...templOpt.htmlOption.pdfOption}
const {path,...pdfOption} = o //remove path if exists
console.log("pdfOption:",pdfOption)
const buffer = await page.pdf(pdfOption); const buffer = await page.pdf(pdfOption);
await browser.close(); await browser.close();
return buffer return buffer
@ -189,12 +189,12 @@ htmlTemplateRoute.post("/", async function (req, res) {
res.setHeader("Content-Length", buffer.length) res.setHeader("Content-Length", buffer.length)
res.end(buffer) res.end(buffer)
} catch (ex) { } catch (ex) {
if(ex instanceof SyntaxError){ if (ex instanceof SyntaxError) {
res.statusCode = 400 res.statusCode = 400
res.statusMessage = ex.message res.statusMessage = ex.message
res.end(res.statusMessage) res.end(res.statusMessage)
console.error("report-template/html: ", ex) console.error("report-template/html: ", ex)
}else{ } else {
res.statusCode = 500 res.statusCode = 500
res.statusMessage = "Internal Server Error during POST report-template/html" res.statusMessage = "Internal Server Error during POST report-template/html"
res.end(res.statusMessage) res.end(res.statusMessage)

View file

@ -1,4 +1,4 @@
import { PDFOptions } from 'puppeteer'
/** /**
* @prop {string} template template ID * @prop {string} template template ID
* @prop {string} reportName outputname * @prop {string} reportName outputname
@ -12,14 +12,22 @@ export interface templateOption {
data: object data: object
} }
/** /**
* @prop {string} querySelector template ID * @prop {number} navigationTimeout page.setDefaultNavigationTimeout(navigationTimeout)
* @prop {object} pdfOption outputname * @prop {number} querySelector Element of page
* @prop {number} width support only html-template * @prop {number} waitUntil 'networkidle0' or 'networkidle0'
* @prop {number} preloadWait wait in the scroll loop in ms second
* @prop {number} preloadScroll scroll height
* @prop {number} preloadLoop number of retry preload lazy load page
* @prop {PDFOptions} pdfOption PdfOptions of Puppeteer
*/ */
export interface htmlTemplateOption { export interface htmlTemplateOption {
querySelector: string navigationTimeout?:number
pdfOption?: object querySelector?: string
width?:number waitUntil?:string
preloadWait?:number
preloadScroll?:number
preloadLoop?:number
pdfOption?: PDFOptions
} }