#------------------------------------------------------------------------------ # File: iWork.pm # # Description: Read Apple iWork '09 XML+ZIP files # # Revisions: 2009/11/11 - P. Harvey Created #------------------------------------------------------------------------------ package Image::ExifTool::iWork; use strict; use vars qw($VERSION); use Image::ExifTool qw(:DataAccess :Utils); use Image::ExifTool::XMP; use Image::ExifTool::ZIP; $VERSION = '1.04'; # test for recognized iWork document extensions and outer XML elements my %iWorkType = ( # file extensions NUMBERS => 'NUMBERS', PAGES => 'PAGES', KEY => 'KEY', KTH => 'KTH', NMBTEMPLATE => 'NMBTEMPLATE', # we don't support double extensions -- # "PAGES.TEMPLATE" => 'Apple Pages Template', # outer XML elements 'ls:document' => 'NUMBERS', 'sl:document' => 'PAGES', 'key:presentation' => 'KEY', ); # MIME types for iWork files (Apple has not registered these yet, but these # are my best guess after doing some googling. I'm not 100% sure what "sff" # indicates, but I think it refers to the new "flattened" package format) my %mimeType = ( 'NUMBERS' => 'application/x-iwork-numbers-sffnumbers', 'PAGES' => 'application/x-iwork-pages-sffpages', 'KEY' => 'application/x-iWork-keynote-sffkey', 'NMBTEMPLATE' => 'application/x-iwork-numbers-sfftemplate', 'PAGES.TEMPLATE'=> 'application/x-iwork-pages-sfftemplate', 'KTH' => 'application/x-iWork-keynote-sffkth', ); # iWork tags %Image::ExifTool::iWork::Main = ( GROUPS => { 0 => 'XML', 1 => 'XML', 2 => 'Document' }, PROCESS_PROC => \&Image::ExifTool::XMP::ProcessXMP, VARS => { NO_ID => 1 }, NOTES => q{ The Apple iWork '09 file format is a ZIP archive containing XML files similar to the Office Open XML (OOXML) format. Metadata tags in iWork files are extracted even if they don't appear below. }, authors => { Name => 'Author', Groups => { 2 => 'Author' } }, comment => { }, copyright => { Groups => { 2 => 'Author' } }, keywords => { }, projects => { List => 1 }, title => { }, ); #------------------------------------------------------------------------------ # Generate a tag ID for this XML tag # Inputs: 0) tag property name list ref # Returns: tagID sub GetTagID($) { my $props = shift; return 0 if $$props[-1] =~ /^\w+:ID$/; # ignore ID tags return ($$props[0] =~ /^.*?:(.*)/) ? $1 : $$props[0]; } #------------------------------------------------------------------------------ # We found an XMP property name/value # Inputs: 0) ExifTool object ref, 1) tag table ref # 2) reference to array of XMP property names (last is current property) # 3) property value, 4) attribute hash ref (not used here) # Returns: 1 if valid tag was found sub FoundTag($$$$;$) { my ($et, $tagTablePtr, $props, $val, $attrs) = @_; return 0 unless @$props; my $verbose = $et->Options('Verbose'); $et->VPrint(0, " | - Tag '", join('/',@$props), "'\n") if $verbose > 1; # un-escape XML character entities $val = Image::ExifTool::XMP::UnescapeXML($val); # convert from UTF8 to ExifTool Charset $val = $et->Decode($val, 'UTF8'); my $tag = GetTagID($props) or return 0; # add any unknown tags to table unless ($$tagTablePtr{$tag}) { $et->VPrint(0, " [adding $tag]\n") if $verbose; AddTagToTable($tagTablePtr, $tag, { Name => ucfirst $tag }); } # save the tag $et->HandleTag($tagTablePtr, $tag, $val); return 1; } #------------------------------------------------------------------------------ # Extract information from an iWork file # Inputs: 0) ExifTool object reference, 1) dirInfo reference # Returns: 1 # Notes: Upon entry to this routine, the file type has already been verified # as ZIP and the dirInfo hash contains a 'ZIP' Archive::Zip object reference sub Process_iWork($$) { my ($et, $dirInfo) = @_; my $zip = $$dirInfo{ZIP}; my ($type, $index, $indexFile, $status); # try to determine the file type local $SIG{'__WARN__'} = \&Image::ExifTool::ZIP::WarnProc; # trust type given by file extension if available $type = $iWorkType{$$et{FILE_EXT}} if $$et{FILE_EXT}; unless ($type) { # read the index file my @members = $zip->membersMatching('^index\.(xml|apxl)$'); if (@members) { ($index, $status) = $zip->contents($members[0]); unless ($status) { $indexFile = $members[0]->fileName(); if ($index =~ /^\s*<\?xml version=[^<]+<(\w+:\w+)/s) { $type = $iWorkType{$1} if $iWorkType{$1}; } } } $type or $type = 'ZIP'; # assume ZIP by default } $et->SetFileType($type, $mimeType{$type}); my @members = $zip->members(); my $docNum = 0; my $member; foreach $member (@members) { # get filename of this ZIP member my $file = $member->fileName(); next unless defined $file; $et->VPrint(0, "File: $file\n"); # set the document number and extract ZIP tags $$et{DOC_NUM} = ++$docNum; Image::ExifTool::ZIP::HandleMember($et, $member); # process only the index XML and JPEG thumbnail files next unless $file =~ m{^(index\.(xml|apxl)|QuickLook/Thumbnail\.jpg)$}i; # get the file contents if necessary # (CAREFUL! $buff MUST be local since we hand off a value ref to PreviewImage) my ($buff, $buffPt); if ($indexFile and $indexFile eq $file) { # use the index file we already loaded $buffPt = \$index; } else { ($buff, $status) = $zip->contents($member); $status and $et->Warn("Error extracting $file"), next; $buffPt = \$buff; } # extract JPEG as PreviewImage (should only be QuickLook/Thumbnail.jpg) if ($file =~ /\.jpg$/) { $et->FoundTag('PreviewImage', $buffPt); next; } # process "metadata" section of XML index file next unless $$buffPt =~ /<(\w+):metadata>/g; my $ns = $1; my $p1 = pos $$buffPt; next unless $$buffPt =~ m{}g; # construct XML data from "metadata" section only $$buffPt = '' . substr($$buffPt, $p1, pos($$buffPt)-$p1); my %dirInfo = ( DataPt => $buffPt, DirLen => length $$buffPt, DataLen => length $$buffPt, XMPParseOpts => { FoundProc => \&FoundTag, }, ); my $tagTablePtr = GetTagTable('Image::ExifTool::iWork::Main'); $et->ProcessDirectory(\%dirInfo, $tagTablePtr); undef $$buffPt; # (free memory now) } delete $$et{DOC_NUM}; return 1; } 1; # end __END__ =head1 NAME Image::ExifTool::iWork - Read Apple iWork '09 XML+ZIP files =head1 SYNOPSIS This module is used by Image::ExifTool =head1 DESCRIPTION This module contains definitions required by Image::ExifTool to extract meta information from Apple iWork '09 XML+ZIP files. =head1 AUTHOR Copyright 2003-2016, Phil Harvey (phil at owl.phy.queensu.ca) This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =head1 SEE ALSO L, L, L =cut