support puppeteer's PDFOptions, add param for preload

This commit is contained in:
oom 2025-02-28 10:31:11 +07:00
parent f6b68e4379
commit c33e0653af
4 changed files with 117 additions and 71 deletions

View file

@ -64,9 +64,13 @@ $ npx ts-node html-template.ts
หรือใช้ Rest Client ดูไฟล์ [api.http](./api.http)
ตรง http header จะใช้ accept เป็นตัวบอกว่าต้องการผลเป็นไฟล์แบบไหนโดยใช้ [Mime type](https://developer.mozilla.org/en-US/docs/Web/HTTP/MIME_types/Common_types) เพื่อเป็นมาตรฐาน ให้ดูที่รองรับในฟังก์ชั่น [mimeToExtension](./libs/report-template.ts)
การตั้งค่าของ template อยู่ที่ [templateOption](./libs/report-template.ts)
### HTML
แปลงจาก URL เป็น pdf,png,jpg รองรับการตัดคำไทย ฟีเจอร์ template ยังไม่เสร็จ
แปลงจาก URL เป็น pdf,png,jpg รองรับการตัดคำไทย การแปลงเป็น
- [templateOption.htmlTemplateOption](./libs/report-template.ts) เป็นการตั้งค่าเฉพาะของโมดุลนี้
- templateOption.htmlTemplateOption.PDFOptions ซึ่งเป็น [PDFOptions](https://pptr.dev/api/puppeteer.pdfoptions) ของ puppeteer
ฟีเจอร์ template ยังไม่เสร็จ
``` sh
# Grafana dashboard to pdf
curl -X 'POST' \
@ -80,6 +84,7 @@ curl -X 'POST' \
}
}' -o html-grafana.pdf
# url to png
curl -X 'POST' 'http://localhost:3001/api/v1/report-template/html' \
-H 'accept: image/png' -H 'Content-Type: application/json' \
@ -88,8 +93,31 @@ curl -X 'POST' 'http://localhost:3001/api/v1/report-template/html' \
curl -X 'POST' 'http://localhost:3001/api/v1/report-template/html' \
-H 'accept: image/jpeg' -H 'Content-Type: application/json' \
-d '{"template": "https://pantip.com/","reportName": "html-blognone"}' -o html-pantip.jpeg
# url to pdf pantip.com very long lazyload you
curl -X 'POST' 'http://localhost:3001/api/v1/report-template/html' \
-H 'accept: application/pdf' -H 'Content-Type: application/json' \
-d '{"template": "https://pantip.com/","reportName": "html-blognone",
"htmlOption": {
"preloadWait":500,
"preloadScroll":1000,
"preloadLoop":25,
"pdfOption":{"format":"A4"}
}
}' -o html-pantip.pdf
```
# pantip.com to pdf
curl -X 'POST' \
'http://localhost:3001/api/v1/report-template/html' \
-H 'accept: application/pdf' -H 'Content-Type: application/json' \
-d '{
"template": "https://pantip.com",
"reportName": "html-pantip.com"
}' -o html-pantip.com.pdf
### docx
แปลงจากเทมเพลทไฟล์ .docx เป็น docx,pdf,png
```sh

View file

@ -109,7 +109,7 @@ Accept: application/pdf
}
}
### convert blognone to pdf
### convert blognone to png
POST {{api_host}}/html
Content-Type: application/json
Accept: image/png
@ -118,3 +118,13 @@ Accept: image/png
"template": "https://www.blognone.com",
"reportName": "html-blognone"
}
### convert blognone to pdf
POST {{api_host}}/html
Content-Type: application/json
Accept: application/pdf
{
"template": "https://pantip.com",
"reportName": "html-pantip.com"
}

View file

@ -5,13 +5,16 @@ import { mimeToExtension, templateOption } from "./report-template"
//import { chromium } from 'playwright'
import puppeteer, {PDFOptions} from 'puppeteer'
import Handlebars from 'handlebars'
import e from "express"
//import { createReport } from "docx-templates"
// แก้ package.json ของ LibreOfficeFileConverter
// https://github.com/microsoft/TypeScript/issues/52363#issuecomment-1659179354
//import { LibreOfficeFileConverter } from "libreoffice-file-converter"
const TEMPLATE_FOLDER_NAME = "templates/html"
const width_px = 1200; //TODO read from htmlOption
function wait(ms: number) {
return new Promise(resolve => setTimeout(resolve, ms));
}
/**
* docxTemplate Uses docx-template to convert input data and template to output buffer.
@ -19,68 +22,58 @@ const width_px = 1200; //TODO read from htmlOption
* You have to handle exception throw by function
* handlebars template support only content from Buffer
* @param {Buffer|String} t template in buffer format or url to web page
* @param {templateOption} tdata Template Information in JSON format
* @param {templateOption} templOpt Template Information in JSON format
* @param {String} outputMediaType output extension, support pdf, jpeg, png
* @return {Promise<Uint8Array>} output buffer after apply template.
*/
export async function htmlTemplateX(t: Buffer | String, tdata: templateOption, outputMediaType: string = "pdf"): Promise<Uint8Array> {
export async function htmlTemplateX(t: Buffer | String, templOpt: templateOption, outputMediaType: string = "pdf"): Promise<Uint8Array> {
try {
if (!["pdf", "jpeg", "png"].find((e) => e === outputMediaType)) {
throw "FormatError"
}
const browser = await puppeteer.launch({ headless: true, args: ['--no-sandbox'] });
const page = await browser.newPage();
if(templOpt.htmlOption?.navigationTimeout)
page.setDefaultNavigationTimeout(120000);
await page.setViewport({
width: width_px,
height: 800,
deviceScaleFactor: 2,
isMobile: false
});
if (typeof t === 'string') {
await page.goto(t, { waitUntil: 'networkidle0' });
switch(templOpt.htmlOption?.waitUntil){
case 'networkidle0': await page.goto(t, { waitUntil: 'networkidle0' })
break
case 'networkidle2': await page.goto(t, { waitUntil: 'networkidle2' })
break
default: await page.goto(t)
}
} else {
if (tdata.data) {
if (templOpt.data) {
const template = Handlebars.compile(t.toString());
const html = template(tdata.data);
const html = template(templOpt.data);
await page.setContent(html);
} else {
await page.setContent(t.toString())
}
}
const totalHeight = await page.evaluate(async (_templOpt) => {
//force scroll for lazy load page
const _scroll = _templOpt.htmlOption?.preloadScroll??1000
const _wait = _templOpt.htmlOption?.preloadWait??400
const _loop =_templOpt.htmlOption?.preloadLoop??4
/*
// try to load whole page
let x = await page.evaluate(async (tdata) => {
const scrollableSection =
(tdata.htmlOption?.querySelector && document.querySelector(tdata.htmlOption.querySelector)) ?
document.querySelector(tdata.htmlOption.querySelector) : document.body
if (scrollableSection) {
const childElement = scrollableSection.firstElementChild;
let scrollPosition = 0;
let viewportHeight = window.innerHeight;
if (childElement)
while (scrollPosition < childElement.scrollHeight) {
scrollableSection.scrollBy(0, viewportHeight);
await new Promise(resolve => setTimeout(resolve, 500));
scrollPosition += viewportHeight;
for (let i = 0; i < _loop; i++) {
window.scrollBy(0, _scroll);
await new Promise(resolve => setTimeout(resolve, _wait));
}
return scrollPosition
}
return 0
}, tdata);
//console.log("scrollPosition=" + x)
//extra wait
await new Promise(resolve => setTimeout(resolve, _wait*2));
*/
//find real page height
const totalHeight = await page.evaluate(async (tdata) => {
let scrollableSection =
(tdata.htmlOption?.querySelector &&
document.querySelector(tdata.htmlOption.querySelector) && document.querySelector(tdata.htmlOption.querySelector)) ?
document.querySelector(tdata.htmlOption.querySelector) : null
(_templOpt.htmlOption?.querySelector &&
document.querySelector(_templOpt.htmlOption.querySelector)) ?
document.querySelector(_templOpt.htmlOption.querySelector) : null
const childElement = scrollableSection ? scrollableSection : document.body
if (scrollableSection == null)
scrollableSection = document.body
//const childElement = scrollableSection.firstElementChild;
@ -91,23 +84,25 @@ export async function htmlTemplateX(t: Buffer | String, tdata: templateOption, o
await new Promise(resolve => setTimeout(resolve, 500));
scrollPosition += viewportHeight;
}
//return scrollPosition
return childElement.scrollHeight
}, tdata);
}, templOpt);
console.log("totalHeight")
if (!totalHeight) {
throw new Error(`Unable to determine the page height ${totalHeight}. The selector may not correct or no body tag`);
} else {
console.log("Page height adjusted to:", totalHeight);
}
console.log("set viewport ")
await page.setViewport({
width: width_px,
width: Number(templOpt.htmlOption?.pdfOption?.width??1200),
height: totalHeight,
deviceScaleFactor: 2,
isMobile: false
});
///// output to photo end here
if (outputMediaType === "png" || outputMediaType === "jpeg") {
const photoBuffer = await page.screenshot({
@ -120,16 +115,21 @@ export async function htmlTemplateX(t: Buffer | String, tdata: templateOption, o
}
///// output to PDF
//TODO overide option from htmlTemplateOption
let pdfOption:PDFOptions = {
let o:PDFOptions ={
// path: './url_prop.pdf',
// format:"A4",
width: width_px,
width: 1200,
height: totalHeight,
printBackground: true,
scale: 1,
displayHeaderFooter: false,
margin: { top: 5, right: 5, bottom: 5, left: 5 }
}
//Murge with default
if(templOpt.htmlOption?.pdfOption)
o = {...o,...templOpt.htmlOption.pdfOption}
const {path,...pdfOption} = o //remove path if exists
console.log("pdfOption:",pdfOption)
const buffer = await page.pdf(pdfOption);
await browser.close();
return buffer

View file

@ -1,4 +1,4 @@
import { PDFOptions } from 'puppeteer'
/**
* @prop {string} template template ID
* @prop {string} reportName outputname
@ -12,14 +12,22 @@ export interface templateOption {
data: object
}
/**
* @prop {string} querySelector template ID
* @prop {object} pdfOption outputname
* @prop {number} width support only html-template
* @prop {number} navigationTimeout page.setDefaultNavigationTimeout(navigationTimeout)
* @prop {number} querySelector Element of page
* @prop {number} waitUntil 'networkidle0' or 'networkidle0'
* @prop {number} preloadWait wait in the scroll loop in ms second
* @prop {number} preloadScroll scroll height
* @prop {number} preloadLoop number of retry preload lazy load page
* @prop {PDFOptions} pdfOption PdfOptions of Puppeteer
*/
export interface htmlTemplateOption {
querySelector: string
pdfOption?: object
width?:number
navigationTimeout?:number
querySelector?: string
waitUntil?:string
preloadWait?:number
preloadScroll?:number
preloadLoop?:number
pdfOption?: PDFOptions
}