|
THotPDF Preflight Report Methods
Syntax
type
THPDFPreflightFormat = (pfText, pfJSON, pfHTML);
function CreatePreflightReport(const SourceFile: TFileName): AnsiString;
function CreatePreflightReport(
const SourceFile: TFileName;
const Password : AnsiString): AnsiString;
function CreatePreflightReport(
const SourceFile: TFileName;
const Password : AnsiString;
const Format : THPDFPreflightFormat): AnsiString;
procedure SavePreflightReport(
const SourceFile: TFileName;
const ReportFile: TFileName);
procedure SavePreflightReport(
const SourceFile: TFileName;
const ReportFile: TFileName;
const Password : AnsiString);
procedure SavePreflightReport(
const SourceFile: TFileName;
const ReportFile: TFileName;
const Password : AnsiString;
const Format : THPDFPreflightFormat);
function ValidatePDFVT(
const SourceFile: TFileName;
out Report : AnsiString): Boolean;
function ValidatePDFVT(
const SourceFile: TFileName;
const Password : AnsiString;
out Report : AnsiString): Boolean;
procedure EmbedPreflightReportInPDF(
const SourceFile: TFileName;
const TargetFile: TFileName);
procedure EmbedPreflightReportInPDF(
const SourceFile: TFileName;
const TargetFile: TFileName;
const Password : AnsiString);
function LoadAndValidatePreflightReport(
const EmbeddedFile : TFileName;
out OriginalReport: AnsiString;
out CurrentReport : AnsiString): Boolean;
function LoadAndValidatePreflightReport(
const EmbeddedFile : TFileName;
const Password : AnsiString;
out OriginalReport: AnsiString;
out CurrentReport : AnsiString): Boolean;
function ComparePreflightReports(
const ReportA: AnsiString;
const ReportB: AnsiString): AnsiString;
function RepairPDFFromPreflightReport(
const SourceFile: TFileName;
const TargetFile: TFileName;
out RepairsApplied: AnsiString): Boolean;
function RepairPDFFromPreflightReport(
const SourceFile: TFileName;
const TargetFile: TFileName;
const Password : AnsiString;
out RepairsApplied: AnsiString): Boolean;
function AggregatePreflightReports(
const Reports: array of AnsiString): AnsiString;
type
THPDFPreflightProfile = record
DisableChecks : AnsiString;
DisableWarnings: AnsiString;
DisableHints : Boolean;
end;
function LoadPreflightProfile(
const ProfileFile: TFileName): THPDFPreflightProfile;
function ApplyPreflightProfile(
const Report : AnsiString;
const Profile: THPDFPreflightProfile): AnsiString;
function ValidatePreflightProfile(
const Profile: THPDFPreflightProfile;
out UnknownNames: AnsiString): Boolean;
function GetBuiltInPreflightProfile(
const Name: AnsiString): THPDFPreflightProfile;
procedure SavePreflightProfile(
const Profile : THPDFPreflightProfile;
const ProfileFile: TFileName);
function CreatePreflightReportWithProfile(
const SourceFile : TFileName;
const Password : AnsiString;
const ProfileFile: TFileName;
const Format : THPDFPreflightFormat): AnsiString;
function MergePreflightProfiles(
const A, B: THPDFPreflightProfile): THPDFPreflightProfile;
function DiffPreflightProfiles(
const A, B : THPDFPreflightProfile;
out OnlyInA: AnsiString;
out OnlyInB: AnsiString): Boolean;
procedure EmbedPreflightReportAsXMP(
const SourceFile: TFileName;
const TargetFile: TFileName);
procedure EmbedPreflightReportAsXMP(
const SourceFile: TFileName;
const TargetFile: TFileName;
const Password : AnsiString);
function ConvertPreflightReportToVeraPDFStyle(
const Report: AnsiString): AnsiString;
Description
CreatePreflightReport reads an input PDF and returns a
text report with lightweight structural checks. The password overloads
use the existing password-aware load path for supported encrypted PDFs.
SavePreflightReport writes the same report to a target text
file.
ValidatePDFVT runs a focused PDF/VT validation pass and
returns True only when the required markers and structures
are present. The returned text report checks the
pdfvtid:GTS_PDFVTVersion XMP claim, PDF/VT metadata
namespace, PDF/VT modification date and xmp:ModifyDate
match, PDF/X base marker, encryption absence, PDF/X output intent,
catalog DPartRoot, DPartRootNode,
NodeNameList, document part presence, loadable page count,
and page-level DPart coverage. PDF/VT and XMP date
properties may be written either as element text or as RDF attributes
on an rdf:Description element.
The report checks the PDF header, EOF marker, whether the final
%%EOF marker is near the end of the file, the last
startxref offset, whether that offset targets an xref table
or xref stream, %%EOF and startxref marker
counts, revision marker balance and final marker ordering, trailer or
XRef stream marker, loadable page count,
encryption state, catalog, page tree, page object marker,
MediaBox, information dictionary reference, root reference,
indirect object count, indirect object definition count, highest object
number, duplicate object number count, object number uniqueness, catalog
object count, page tree object count, page object count, page tree
declared page count, page tree count/loadable page consistency, page
object/loadable page consistency, stream and endstream
balance, stream /Length entry coverage, xref table count,
xref subsection count,
xref table entry counts, xref free and in-use entry counts, malformed
xref row count, xref row validity, xref coverage against object numbers,
xref stream count, object stream count, trailer count, trailer
/Size, whether trailer /Size covers the
highest object number, trailer /Root and /Info
indirect references, whether those referenced objects are defined,
whether trailer /Root targets a catalog object,
trailer /ID array entry count and pair structure, trailer
/Root, /Info, /ID, and
/Encrypt presence,
estimated incremental update count, linearization status, loaded page
boxes, form field count, resource dictionary count, font resource count,
embedded font program count (/FontFile,
/FontFile2, /FontFile3), image XObject count,
form XObject count, ColorSpace resource count, annotation count and
widget/link annotation subcounts, filter chain usage counts for
FlateDecode, DCTDecode, CCITTFaxDecode,
JBIG2Decode, LZWDecode, ASCIIHexDecode,
ASCII85Decode, RunLengthDecode, and
JPXDecode, font embedding coverage, annotation count
consistency, PDF/A / PDF/X / PDF/UA / Tagged PDF / transparency
compliance hints, selected feature flags, JavaScript or action
warnings (covering the full PDF 1.7 sec 12.6.4 action set: GoTo*,
Thread, URI, Sound, Movie, Hide, Named, SubmitForm, ResetForm,
ImportData, SetOCGState, Rendition, Trans, and GoTo3DView, plus the
legacy JavaScript / Launch / OpenAction / AdditionalActions /
EmbeddedFile / RichMedia checks and PDF/A mutual-exclusion warnings),
warning totals, and an overall pass/fail summary.
These helpers are intended as fast in-process checks before handing files
to external standards validators; they are not a replacement for a full
PDF/A, PDF/X, PDF/UA, or ISO 32000 preflight validation engine.
The Format overload accepts pfText (default
plain-text path, byte-stable with the legacy overloads),
pfJSON (CI-friendly JSON document), or
pfHTML (dashboard-ready, self-contained HTML report
with inline CSS and severity-colored rows). The JSON output groups
entries into input, size, pdfVersion,
and xrefStyle top-level fields plus checks,
info, hints, and warnings arrays
and a summary object carrying failed,
warnings, and result values. The built-in JSON
encoder escapes ", \, and control bytes;
UTF-8 byte sequences pass through unchanged.
Each checks, hints, and warnings
entry carries an optional spec field that names the ISO
clause the diagnostic targets (for example
ISO 32000-1 sec 7.5.5 for trailer-related checks,
ISO 19005-1 sec 6.6.2 for the PDF/A with JavaScript
mutual-exclusion warning). The mapping covers every check, hint, and
warning emitted by the report through v2.122.0. A formal JSON Schema
(draft 2020-12) describing the pfJSON output structure
is published at Docs/preflight-schema.json so
downstream consumers can validate the output before parsing.
EmbedPreflightReportInPDF writes a copy of the source
PDF to TargetFile with the legacy text-format preflight
report appended after the last %%EOF as PDF-style
comment lines (each line prefixed with % ). PDF
readers ignore the appended bytes because they fall outside the
object graph, but archive workflows, text editors, and
grep-style tools can still surface the embedded report
for later audit. The original object graph, cross-reference table,
and trailer are not modified.
LoadAndValidatePreflightReport extracts the report
that EmbedPreflightReportInPDF previously appended,
re-runs the current preflight algorithm against the source bytes
preceding the embedded comment block, and compares the
InputFingerprint values. The function returns
True when both fingerprints match, indicating the
embedded report still describes the file as it exists on disk.
When validation fails, callers can inspect
OriginalReport and CurrentReport to
diff what changed.
ComparePreflightReports emits a unified-diff-like
body for two reports. Shared lines appear with a two-space prefix,
lines unique to ReportA are prefixed with - ,
and lines unique to ReportB are prefixed with
+ . The algorithm is tuned for the well-formed
key/value output produced by CreatePreflightReport
where line order is deterministic; it is not a general-purpose
LCS implementation but produces compact, readable diffs for the
typical case of mostly-unchanged reports.
RepairPDFFromPreflightReport applies a conservative
subset of byte-level repairs to a damaged PDF: it drops trailing
bytes that follow the final %%EOF marker, and appends
a missing %%EOF when one is not present anywhere. The
function returns True when at least one repair was
applied; RepairsApplied lists the repairs as one line
per change. Object-graph repairs (rebuilding xref tables, patching
stream lengths, fixing trailer dictionaries) are intentionally out
of scope because such fixes can make a partially-recoverable file
less recoverable; use a dedicated PDF recovery tool for deeper
damage.
AggregatePreflightReports takes an array of
per-file reports and produces a single batch summary. Each entry
in the array is parsed for its Input,
Size, and Summary fields; the aggregate
emits one line per file plus totals for the number of passed,
failed, and warning reports, total bytes scanned, and overall
result counts. Useful for CI pipelines and shell loops that
process a directory of PDFs and want a single concise summary at
the end.
LoadPreflightProfile and
ApplyPreflightProfile let callers tailor the report
output without re-running the analysis. The profile file uses an
INI-style format with three optional sections:
[disable-checks] lists check names to suppress;
[disable-warnings] lists warning names to suppress;
and [options] accepts hints=false to
suppress every Hint line. After filtering, the
Summary line is recomputed so the suppressed entries no longer
contribute to the Failed and Warnings totals; the rest of the
report passes through unchanged.
ValidatePreflightProfile walks a loaded profile's
DisableChecks and DisableWarnings lists
and reports any name not recognized by the current preflight
implementation. Useful to detect profile files authored against a
newer or older HotPDF version that would otherwise silently
disable nothing because the names do not match any emitted check
or warning.
GetBuiltInPreflightProfile returns ready-to-use
profile records for common workflows. Recognized names
(case-insensitive): default / ''
returns an empty profile; compact disables every
Hint line for shorter reports;
silent-actions disables every PDF 1.7 sec 12.6.4
action warning plus EmbeddedFile and
RichMedia, intended for workflows that
intentionally embed multimedia or interactive actions and do
not want the preflight report to flag them. Unknown names also
return an empty profile.
SavePreflightProfile writes a
THPDFPreflightProfile record back to an INI file in
the same format that LoadPreflightProfile consumes.
The two functions are exact inverses for well-formed profiles,
so callers can build a 'load preset, tweak, save' workflow:
P := GetBuiltInPreflightProfile('compact'), edit
P.DisableChecks, then
SavePreflightProfile(P, 'tuned.ini') for later
reuse.
CreatePreflightReportWithProfile is a one-stop
convenience wrapper that composes the existing
CreatePreflightReport,
LoadPreflightProfile,
ApplyPreflightProfile, and format converters so a
caller that already has a source PDF, optional password,
profile file, and target format can issue a single call instead
of chaining four. Passing an empty ProfileFile
skips the profile step.
MergePreflightProfiles returns the deduplicated
union of two profiles: DisableChecks and
DisableWarnings collect names from both inputs,
DisableHints is the logical OR. Useful for layering
a preset over project-specific tweaks.
DiffPreflightProfiles reports the structural
difference between two profiles. Returns True when
they are equivalent; otherwise False with
OnlyInA and OnlyInB filled with
newline-separated lines of the form check:<name>,
warn:<name>, or option:hints=false
describing which entries are exclusive to each side.
EmbedPreflightReportAsXMP is an archival-friendly
variant of EmbedPreflightReportInPDF: it writes a
copy of the source PDF with the preflight report appended as a
PDF-style comment block whose payload is XMP / RDF
(xmlns:hotpdf="http://www.loslab.com/hotpdf/preflight/").
Each report line becomes a hotpdf:<name>
element with severity carried as an attribute on check rows.
XMP-aware tools that scan a file for xpacket
markers can surface the embedded report; PDF readers continue
to ignore the appended bytes because they fall outside the
object graph. This is intentionally not a spec-compliant XMP
integration: the XMP payload is not referenced from the catalog
/Metadata entry, so XMP readers that only follow
the catalog reference will not see it.
ConvertPreflightReportToVeraPDFStyle shapes a
HotPDF preflight report into a JSON document that follows the
broad outline of veraPDF's validation output, with a top-level
profile field, a jobs array containing
itemDetails / taskResult /
validationResult, and a ruleViolations
array under validationResult.details. HotPDF-styled
rather than wire-compatible with veraPDF; the goal is to let
downstream tooling that already consumes veraPDF JSON adapt to
HotPDF output with minimal field-name remapping rather than
re-learning a completely different data layout.
Example
var
PDF: THotPDF;
begin
PDF := THotPDF.Create(nil);
try
PDF.SavePreflightReport('Input.pdf', 'PreflightReport.txt');
PDF.SavePreflightReport('Encrypted.pdf', 'EncryptedReport.txt', 'user-password');
PDF.SavePreflightReport('Input.pdf', 'PreflightReport.json', '', pfJSON);
PDF.SavePreflightReport('Input.pdf', 'PreflightReport.html', '', pfHTML);
PDF.EmbedPreflightReportInPDF('Input.pdf', 'Input-with-report.pdf');
if not PDF.LoadAndValidatePreflightReport(
'Input-with-report.pdf', OriginalReport, CurrentReport)
then
WriteLn('Embedded preflight report no longer matches the source.');
finally
PDF.Free;
end;
end;
See Also
|