THotPDF Preflight Report Methods

Syntax

type
  THPDFPreflightFormat = (pfText, pfJSON, pfHTML);

function CreatePreflightReport(const SourceFile: TFileName): AnsiString;
function CreatePreflightReport(
  const SourceFile: TFileName;
  const Password  : AnsiString): AnsiString;
function CreatePreflightReport(
  const SourceFile: TFileName;
  const Password  : AnsiString;
  const Format    : THPDFPreflightFormat): AnsiString;
procedure SavePreflightReport(
  const SourceFile: TFileName;
  const ReportFile: TFileName);
procedure SavePreflightReport(
  const SourceFile: TFileName;
  const ReportFile: TFileName;
  const Password  : AnsiString);
procedure SavePreflightReport(
  const SourceFile: TFileName;
  const ReportFile: TFileName;
  const Password  : AnsiString;
  const Format    : THPDFPreflightFormat);
function ValidatePDFVT(
  const SourceFile: TFileName;
  out Report      : AnsiString): Boolean;
function ValidatePDFVT(
  const SourceFile: TFileName;
  const Password  : AnsiString;
  out Report      : AnsiString): Boolean;
procedure EmbedPreflightReportInPDF(
  const SourceFile: TFileName;
  const TargetFile: TFileName);
procedure EmbedPreflightReportInPDF(
  const SourceFile: TFileName;
  const TargetFile: TFileName;
  const Password  : AnsiString);
function LoadAndValidatePreflightReport(
  const EmbeddedFile  : TFileName;
  out OriginalReport: AnsiString;
  out CurrentReport : AnsiString): Boolean;
function LoadAndValidatePreflightReport(
  const EmbeddedFile  : TFileName;
  const Password      : AnsiString;
  out OriginalReport: AnsiString;
  out CurrentReport : AnsiString): Boolean;
function ComparePreflightReports(
  const ReportA: AnsiString;
  const ReportB: AnsiString): AnsiString;
function RepairPDFFromPreflightReport(
  const SourceFile: TFileName;
  const TargetFile: TFileName;
  out RepairsApplied: AnsiString): Boolean;
function RepairPDFFromPreflightReport(
  const SourceFile: TFileName;
  const TargetFile: TFileName;
  const Password  : AnsiString;
  out RepairsApplied: AnsiString): Boolean;
function AggregatePreflightReports(
  const Reports: array of AnsiString): AnsiString;
type
  THPDFPreflightProfile = record
    DisableChecks  : AnsiString;
    DisableWarnings: AnsiString;
    DisableHints   : Boolean;
  end;
function LoadPreflightProfile(
  const ProfileFile: TFileName): THPDFPreflightProfile;
function ApplyPreflightProfile(
  const Report : AnsiString;
  const Profile: THPDFPreflightProfile): AnsiString;
function ValidatePreflightProfile(
  const Profile: THPDFPreflightProfile;
  out UnknownNames: AnsiString): Boolean;
function GetBuiltInPreflightProfile(
  const Name: AnsiString): THPDFPreflightProfile;
procedure SavePreflightProfile(
  const Profile    : THPDFPreflightProfile;
  const ProfileFile: TFileName);
function CreatePreflightReportWithProfile(
  const SourceFile : TFileName;
  const Password   : AnsiString;
  const ProfileFile: TFileName;
  const Format     : THPDFPreflightFormat): AnsiString;
function MergePreflightProfiles(
  const A, B: THPDFPreflightProfile): THPDFPreflightProfile;
function DiffPreflightProfiles(
  const A, B  : THPDFPreflightProfile;
  out OnlyInA: AnsiString;
  out OnlyInB: AnsiString): Boolean;
procedure EmbedPreflightReportAsXMP(
  const SourceFile: TFileName;
  const TargetFile: TFileName);
procedure EmbedPreflightReportAsXMP(
  const SourceFile: TFileName;
  const TargetFile: TFileName;
  const Password  : AnsiString);
function ConvertPreflightReportToVeraPDFStyle(
  const Report: AnsiString): AnsiString;

Description

CreatePreflightReport reads an input PDF and returns a text report with lightweight structural checks. The password overloads use the existing password-aware load path for supported encrypted PDFs. SavePreflightReport writes the same report to a target text file.

ValidatePDFVT runs a focused PDF/VT validation pass and returns True only when the required markers and structures are present. The returned text report checks the pdfvtid:GTS_PDFVTVersion XMP claim, PDF/VT metadata namespace, PDF/VT modification date and xmp:ModifyDate match, PDF/X base marker, encryption absence, PDF/X output intent, catalog DPartRoot, DPartRootNode, NodeNameList, document part presence, loadable page count, and page-level DPart coverage. PDF/VT and XMP date properties may be written either as element text or as RDF attributes on an rdf:Description element.

The report checks the PDF header, EOF marker, whether the final %%EOF marker is near the end of the file, the last startxref offset, whether that offset targets an xref table or xref stream, %%EOF and startxref marker counts, revision marker balance and final marker ordering, trailer or XRef stream marker, loadable page count, encryption state, catalog, page tree, page object marker, MediaBox, information dictionary reference, root reference, indirect object count, indirect object definition count, highest object number, duplicate object number count, object number uniqueness, catalog object count, page tree object count, page object count, page tree declared page count, page tree count/loadable page consistency, page object/loadable page consistency, stream and endstream balance, stream /Length entry coverage, xref table count, xref subsection count, xref table entry counts, xref free and in-use entry counts, malformed xref row count, xref row validity, xref coverage against object numbers, xref stream count, object stream count, trailer count, trailer /Size, whether trailer /Size covers the highest object number, trailer /Root and /Info indirect references, whether those referenced objects are defined, whether trailer /Root targets a catalog object, trailer /ID array entry count and pair structure, trailer /Root, /Info, /ID, and /Encrypt presence, estimated incremental update count, linearization status, loaded page boxes, form field count, resource dictionary count, font resource count, embedded font program count (/FontFile, /FontFile2, /FontFile3), image XObject count, form XObject count, ColorSpace resource count, annotation count and widget/link annotation subcounts, filter chain usage counts for FlateDecode, DCTDecode, CCITTFaxDecode, JBIG2Decode, LZWDecode, ASCIIHexDecode, ASCII85Decode, RunLengthDecode, and JPXDecode, font embedding coverage, annotation count consistency, PDF/A / PDF/X / PDF/UA / Tagged PDF / transparency compliance hints, selected feature flags, JavaScript or action warnings (covering the full PDF 1.7 sec 12.6.4 action set: GoTo*, Thread, URI, Sound, Movie, Hide, Named, SubmitForm, ResetForm, ImportData, SetOCGState, Rendition, Trans, and GoTo3DView, plus the legacy JavaScript / Launch / OpenAction / AdditionalActions / EmbeddedFile / RichMedia checks and PDF/A mutual-exclusion warnings), warning totals, and an overall pass/fail summary. These helpers are intended as fast in-process checks before handing files to external standards validators; they are not a replacement for a full PDF/A, PDF/X, PDF/UA, or ISO 32000 preflight validation engine.

The Format overload accepts pfText (default plain-text path, byte-stable with the legacy overloads), pfJSON (CI-friendly JSON document), or pfHTML (dashboard-ready, self-contained HTML report with inline CSS and severity-coloured rows). The JSON output groups entries into input, size, pdfVersion, and xrefStyle top-level fields plus checks, info, hints, and warnings arrays and a summary object carrying failed, warnings, and result values. The built-in JSON encoder escapes ", \, and control bytes; UTF-8 byte sequences pass through unchanged.

Each checks, hints, and warnings entry carries an optional spec field that names the ISO clause the diagnostic targets (for example ISO 32000-1 sec 7.5.5 for trailer-related checks, ISO 19005-1 sec 6.6.2 for the PDF/A with JavaScript mutual-exclusion warning). The mapping covers every check, hint, and warning emitted by the report through v2.122.0. A formal JSON Schema (draft 2020-12) describing the pfJSON output structure is published at Docs/preflight-schema.json so downstream consumers can validate the output before parsing.

EmbedPreflightReportInPDF writes a copy of the source PDF to TargetFile with the legacy text-format preflight report appended after the last %%EOF as PDF-style comment lines (each line prefixed with ). PDF readers ignore the appended bytes because they fall outside the object graph, but archive workflows, text editors, and grep-style tools can still surface the embedded report for later audit. The original object graph, cross-reference table, and trailer are not modified.

LoadAndValidatePreflightReport extracts the report that EmbedPreflightReportInPDF previously appended, re-runs the current preflight algorithm against the source bytes preceding the embedded comment block, and compares the InputFingerprint values. The function returns True when both fingerprints match, indicating the embedded report still describes the file as it exists on disk. When validation fails, callers can inspect OriginalReport and CurrentReport to diff what changed.

ComparePreflightReports emits a unified-diff-like body for two reports. Shared lines appear with a two-space prefix, lines unique to ReportA are prefixed with , and lines unique to ReportB are prefixed with . The algorithm is tuned for the well-formed key/value output produced by CreatePreflightReport where line order is deterministic; it is not a general-purpose LCS implementation but produces compact, readable diffs for the typical case of mostly-unchanged reports.

RepairPDFFromPreflightReport applies a conservative subset of byte-level repairs to a damaged PDF: it drops trailing bytes that follow the final %%EOF marker, and appends a missing %%EOF when one is not present anywhere. The function returns True when at least one repair was applied; RepairsApplied lists the repairs as one line per change. Object-graph repairs (rebuilding xref tables, patching stream lengths, fixing trailer dictionaries) are intentionally out of scope because such fixes can make a partially-recoverable file less recoverable; use a dedicated PDF recovery tool for deeper damage.

AggregatePreflightReports takes an array of per-file reports and produces a single batch summary. Each entry in the array is parsed for its Input, Size, and Summary fields; the aggregate emits one line per file plus totals for the number of passed, failed, and warning reports, total bytes scanned, and overall result counts. Useful for CI pipelines and shell loops that process a directory of PDFs and want a single concise summary at the end.

LoadPreflightProfile and ApplyPreflightProfile let callers tailor the report output without re-running the analysis. The profile file uses an INI-style format with three optional sections: [disable-checks] lists check names to suppress; [disable-warnings] lists warning names to suppress; and [options] accepts hints=false to suppress every Hint line. After filtering, the Summary line is recomputed so the suppressed entries no longer contribute to the Failed and Warnings totals; the rest of the report passes through unchanged.

ValidatePreflightProfile walks a loaded profile's DisableChecks and DisableWarnings lists and reports any name not recognised by the current preflight implementation. Useful to detect profile files authored against a newer or older HotPDF version that would otherwise silently disable nothing because the names do not match any emitted check or warning.

GetBuiltInPreflightProfile returns ready-to-use profile records for common workflows. Recognised names (case-insensitive): default / '' returns an empty profile; compact disables every Hint line for shorter reports; silent-actions disables every PDF 1.7 sec 12.6.4 action warning plus EmbeddedFile and RichMedia, intended for workflows that intentionally embed multimedia or interactive actions and do not want the preflight report to flag them. Unknown names also return an empty profile.

SavePreflightProfile writes a THPDFPreflightProfile record back to an INI file in the same format that LoadPreflightProfile consumes. The two functions are exact inverses for well-formed profiles, so callers can build a 'load preset, tweak, save' workflow: P := GetBuiltInPreflightProfile('compact'), edit P.DisableChecks, then SavePreflightProfile(P, 'tuned.ini') for later reuse.

CreatePreflightReportWithProfile is a one-stop convenience wrapper that composes the existing CreatePreflightReport, LoadPreflightProfile, ApplyPreflightProfile, and format converters so a caller that already has a source PDF, optional password, profile file, and target format can issue a single call instead of chaining four. Passing an empty ProfileFile skips the profile step.

MergePreflightProfiles returns the deduplicated union of two profiles: DisableChecks and DisableWarnings collect names from both inputs, DisableHints is the logical OR. Useful for layering a preset over project-specific tweaks.

DiffPreflightProfiles reports the structural difference between two profiles. Returns True when they are equivalent; otherwise False with OnlyInA and OnlyInB filled with newline-separated lines of the form check:<name>, warn:<name>, or option:hints=false describing which entries are exclusive to each side.

EmbedPreflightReportAsXMP is an archival-friendly variant of EmbedPreflightReportInPDF: it writes a copy of the source PDF with the preflight report appended as a PDF-style comment block whose payload is XMP / RDF (xmlns:hotpdf="http://www.loslab.com/hotpdf/preflight/"). Each report line becomes a hotpdf:<name> element with severity carried as an attribute on check rows. XMP-aware tools that scan a file for xpacket markers can surface the embedded report; PDF readers continue to ignore the appended bytes because they fall outside the object graph. This is intentionally not a spec-compliant XMP integration: the XMP payload is not referenced from the catalog /Metadata entry, so XMP readers that only follow the catalog reference will not see it.

ConvertPreflightReportToVeraPDFStyle shapes a HotPDF preflight report into a JSON document that follows the broad outline of veraPDF's validation output, with a top-level profile field, a jobs array containing itemDetails / taskResult / validationResult, and a ruleViolations array under validationResult.details. HotPDF-styled rather than wire-compatible with veraPDF; the goal is to let downstream tooling that already consumes veraPDF JSON adapt to HotPDF output with minimal field-name remapping rather than re-learning a completely different data layout.

Example

var
  PDF: THotPDF;
begin
  PDF := THotPDF.Create(nil);
  try
    PDF.SavePreflightReport('Input.pdf', 'PreflightReport.txt');
    PDF.SavePreflightReport('Encrypted.pdf', 'EncryptedReport.txt', 'user-password');
    PDF.SavePreflightReport('Input.pdf', 'PreflightReport.json', '', pfJSON);
    PDF.SavePreflightReport('Input.pdf', 'PreflightReport.html', '', pfHTML);
    PDF.EmbedPreflightReportInPDF('Input.pdf', 'Input-with-report.pdf');
    if not PDF.LoadAndValidatePreflightReport(
      'Input-with-report.pdf', OriginalReport, CurrentReport)
    then
      WriteLn('Embedded preflight report no longer matches the source.');
  finally
    PDF.Free;
  end;
end;

See Also