Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions src/ImageSharp/Formats/Png/PngConstants.cs
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,21 @@ internal static class PngConstants
/// </summary>
public const int MinTextKeywordLength = 1;

/// <summary>
/// Specifies the keyword used to identify the Exif raw profile in image metadata.
/// </summary>
public const string ExifRawProfileKeyword = "Raw profile type exif";

/// <summary>
/// Specifies the profile keyword used to identify raw IPTC metadata within image files.
/// </summary>
public const string IptcRawProfileKeyword = "Raw profile type iptc";

/// <summary>
/// The IPTC resource id in Photoshop IRB. 0x0404 (big endian).
/// </summary>
public const ushort AdobeIptcResourceId = 0x0404;

/// <summary>
/// Gets the header bytes identifying a Png.
/// </summary>
Expand Down Expand Up @@ -100,4 +115,31 @@ internal static class PngConstants
(byte)'m',
(byte)'p'
];

/// <summary>
/// Gets the ASCII bytes for the "Photoshop 3.0" identifier used in some PNG metadata payloads.
/// This value is null-terminated.
/// </summary>
public static ReadOnlySpan<byte> AdobePhotoshop30 =>
[
(byte)'P',
(byte)'h',
(byte)'o',
(byte)'t',
(byte)'o',
(byte)'s',
(byte)'h',
(byte)'o',
(byte)'p',
(byte)' ',
(byte)'3',
(byte)'.',
(byte)'0',
0
];

/// <summary>
/// Gets the ASCII bytes for the "8BIM" signature used in Photoshop resources.
/// </summary>
public static ReadOnlySpan<byte> EightBim => [(byte)'8', (byte)'B', (byte)'I', (byte)'M'];
}
218 changes: 216 additions & 2 deletions src/ImageSharp/Formats/Png/PngDecoderCore.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
using SixLabors.ImageSharp.Metadata.Profiles.Cicp;
using SixLabors.ImageSharp.Metadata.Profiles.Exif;
using SixLabors.ImageSharp.Metadata.Profiles.Icc;
using SixLabors.ImageSharp.Metadata.Profiles.Iptc;
using SixLabors.ImageSharp.Metadata.Profiles.Xmp;
using SixLabors.ImageSharp.PixelFormats;

Expand Down Expand Up @@ -1440,14 +1441,19 @@ private void ReadCompressedTextChunk(ImageMetadata baseMetadata, PngMetadata met
/// object unmodified.</returns>
private static bool TryReadTextChunkMetadata(ImageMetadata baseMetadata, string chunkName, string chunkText)
{
if (chunkName.Equals("Raw profile type exif", StringComparison.OrdinalIgnoreCase) &&
if (chunkName.Equals(PngConstants.ExifRawProfileKeyword, StringComparison.OrdinalIgnoreCase) &&
TryReadLegacyExifTextChunk(baseMetadata, chunkText))
{
// Successfully parsed legacy exif data from text
return true;
}

// TODO: "Raw profile type iptc", potentially others?
if (chunkName.Equals(PngConstants.IptcRawProfileKeyword, StringComparison.OrdinalIgnoreCase) &&
TryReadLegacyIptcTextChunk(baseMetadata, chunkText))
{
// Successfully parsed legacy iptc data from text
return true;
}

// No special chunk data identified
return false;
Expand Down Expand Up @@ -1571,6 +1577,214 @@ private static bool TryReadLegacyExifTextChunk(ImageMetadata metadata, string da
return true;
}

/// <summary>
/// Reads iptc data encoded into a text chunk with the name "Raw profile type iptc".
/// This convention is used by ImageMagick/exiftool/exiv2/digiKam and stores a byte-count
/// followed by hex-encoded bytes.
/// </summary>
/// <param name="metadata">The <see cref="ImageMetadata"/> to store the decoded iptc tags into.</param>
/// <param name="data">The contents of the "Raw profile type iptc" text chunk.</param>
private static bool TryReadLegacyIptcTextChunk(ImageMetadata metadata, string data)
{
// Preserve first IPTC found.
if (metadata.IptcProfile != null)
{
return true;
}

ReadOnlySpan<char> dataSpan = data.AsSpan().TrimStart();

// Must start with the "iptc" identifier (case-insensitive).
// Common real-world format (ImageMagick/ExifTool) is:
// "IPTC profile\n <len>\n<hex...>"
if (dataSpan.Length < 4 || !StringEqualsInsensitive(dataSpan[..4], "iptc".AsSpan()))
{
return false;
}

// Skip the remainder of the first line ("IPTC profile", etc).
int firstLineEnd = dataSpan.IndexOf('\n');
if (firstLineEnd < 0)
{
return false;
}

dataSpan = dataSpan[(firstLineEnd + 1)..].TrimStart();

// Next line contains the decimal byte length (often indented).
int dataLengthEnd = dataSpan.IndexOf('\n');
if (dataLengthEnd < 0)
{
return false;
}

int dataLength;
try
{
dataLength = ParseInt32(dataSpan[..dataLengthEnd]);
}
catch
{
return false;
}

if (dataLength <= 0)
{
return false;
}

// Skip to the hex-encoded data.
dataSpan = dataSpan[(dataLengthEnd + 1)..].Trim();

byte[] iptcBlob = new byte[dataLength];

try
{
int written = 0;

for (; written < dataLength;)
{
ReadOnlySpan<char> lineSpan = dataSpan;

int newlineIndex = dataSpan.IndexOf('\n');
if (newlineIndex != -1)
{
lineSpan = dataSpan[..newlineIndex];
}

// Important: handle CRLF and any incidental whitespace.
lineSpan = lineSpan.Trim(); // removes ' ', '\t', '\r', '\n', etc.

if (!lineSpan.IsEmpty)
{
written += HexConverter.HexStringToBytes(lineSpan, iptcBlob.AsSpan()[written..]);
}

if (newlineIndex == -1)
{
break;
}

dataSpan = dataSpan[(newlineIndex + 1)..];
}

if (written != dataLength)
{
return false;
}
}
catch
{
return false;
}

// Prefer IRB extraction if this is Photoshop-style data (8BIM resource blocks).
byte[] iptcPayload = TryExtractIptcFromPhotoshopIrb(iptcBlob, out byte[] extracted)
? extracted
: iptcBlob;

metadata.IptcProfile = new IptcProfile(iptcPayload);
return true;
}

/// <summary>
/// Attempts to extract IPTC metadata from a Photoshop Image Resource Block (IRB) contained within the specified
/// data buffer.
/// </summary>
/// <remarks>This method scans the provided data for a Photoshop IRB block containing IPTC metadata and
/// extracts it if present. The method does not validate the contents of the IPTC data beyond locating the
/// appropriate resource block.</remarks>
/// <param name="data">A read-only span of bytes containing the Photoshop IRB data to search for embedded IPTC metadata.</param>
/// <param name="iptcBytes">When this method returns, contains the extracted IPTC metadata as a byte array if found; otherwise, an undefined
/// value.</param>
/// <returns><see langword="true"/> if IPTC metadata is successfully extracted from the IRB data; otherwise, <see langword="false"/>.</returns>
private static bool TryExtractIptcFromPhotoshopIrb(ReadOnlySpan<byte> data, out byte[] iptcBytes)
{
iptcBytes = default!;

ReadOnlySpan<byte> adobePhotoshop30 = PngConstants.AdobePhotoshop30;

// Some writers include the "Photoshop 3.0\0" header, some store just IRB blocks.
if (data.Length >= adobePhotoshop30.Length && data[..adobePhotoshop30.Length].SequenceEqual(adobePhotoshop30))
{
data = data[adobePhotoshop30.Length..];
}

ReadOnlySpan<byte> eightBim = PngConstants.EightBim;
ushort adobeIptcResourceId = PngConstants.AdobeIptcResourceId;
while (data.Length >= 12)
{
if (!data[..4].SequenceEqual(eightBim))
{
return false;
}

data = data[4..];

// Resource ID (2 bytes, big endian)
if (data.Length < 2)
{
return false;
}

ushort resourceId = (ushort)((data[0] << 8) | data[1]);
data = data[2..];

// Pascal string name (1-byte length, then bytes), padded to even.
if (data.Length < 1)
{
return false;
}

int nameLen = data[0];
int nameFieldLen = 1 + nameLen;
if ((nameFieldLen & 1) != 0)
{
nameFieldLen++; // pad to even
}

if (data.Length < nameFieldLen + 4)
{
return false;
}

data = data[nameFieldLen..];

// Resource data size (4 bytes, big endian)
int size = (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3];
data = data[4..];

if (size < 0 || data.Length < size)
{
return false;
}

ReadOnlySpan<byte> payload = data[..size];

// Data is padded to even.
int advance = size;
if ((advance & 1) != 0)
{
advance++;
}

if (resourceId == adobeIptcResourceId)
{
iptcBytes = payload.ToArray();
return true;
}

if (data.Length < advance)
{
return false;
}

data = data[advance..];
}

return false;
}

/// <summary>
/// Reads the color profile chunk. The data is stored similar to the zTXt chunk.
/// </summary>
Expand Down
Loading
Loading