Bug 331700

Summary: Some metadata are written with corrupted encoding of non-latin characters
Product: [Applications] digikam Reporter: Michal Thoma <michal>
Component: Metadata-SidecarAssignee: Digikam Developers <digikam-bugs-null>
Status: RESOLVED FIXED    
Severity: normal CC: caulier.gilles
Priority: NOR    
Version: 3.5.0   
Target Milestone: ---   
Platform: Ubuntu   
OS: Linux   
Latest Commit: Version Fixed In: 4.0.0
Sentry Crash Report:
Attachments: Not reproducible with digiKam 4.0.0-beta3

Description Michal Thoma 2014-03-03 12:47:29 UTC
I use XMP sidecar metadta for strorage and during copying, that some of this metatada fails to support utf-8. Non-characters are changed into question marks.

The tag affected is dc:description.

exif:UserComment and tiff:ImageDescription are written all right. Concerned characters are of Czech alphabet like ěščřžů etc.

Find XMP sidecar below. It contain description "Hotel Paříž" which is rendered in dc:description as "Hotel Pa?í?"

This affect interoperability with other software, because dc:description tag is considered primary description field in some apps - especially exiftool. 

Reproducible: Always

Steps to Reproduce:
1. Write description containing non latin character.
2. At right panel look at XMP metadata and search for Dublin Core Description tag. 
3. The non latin character are written wrongly.



<?xml version="1.0" encoding="UTF-8"?>
<x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="XMP Core 4.4.0-Exiv2">
 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
  <rdf:Description rdf:about=""
    xmlns:xmp="http://ns.adobe.com/xap/1.0/"
    xmlns:MicrosoftPhoto="http://ns.microsoft.com/photo/1.0/"
    xmlns:exif="http://ns.adobe.com/exif/1.0/"
    xmlns:digiKam="http://www.digikam.org/ns/1.0/"
    xmlns:tiff="http://ns.adobe.com/tiff/1.0/"
    xmlns:dc="http://purl.org/dc/elements/1.1/"
   xmp:CreatorTool="digiKam-3.4.0"
   xmp:Rating="3"
   MicrosoftPhoto:Rating="50"
   exif:DateTimeOriginal="2014-03-01T17:14:53"
   exif:DateTimeDigitized="2014-03-01T17:14:53"
   exif:ExposureTime="1/200"
   exif:FNumber="90/10"
   exif:ExposureProgram="3"
   exif:ExposureBiasValue="0/10"
   exif:MeteringMode="2"
   exif:FocalLength="5500/100"
   exif:SensingMethod="2"
   exif:CFAPattern="0 2 0 2 2 1 1 0"
   exif:CustomRendered="0"
   exif:ExposureMode="0"
   exif:WhiteBalance="1"
   exif:FocalLengthIn35mmFilm="82"
   exif:SceneCaptureType="0"
   exif:Contrast="2"
   exif:Saturation="0"
   exif:Sharpness="2"
   exif:SubjectDistanceRange="3"
   exif:GPSVersionID="2.3.0.0"
   tiff:ImageWidth="4992"
   tiff:ImageLength="3284"
   tiff:Compression="65535"
   tiff:PhotometricInterpretation="32803"
   tiff:Orientation="8"
   tiff:SamplesPerPixel="1"
   tiff:PlanarConfiguration="1"
   tiff:XResolution="300/1"
   tiff:YResolution="300/1"
   tiff:ResolutionUnit="2"
   tiff:DateTime="2014-03-01T17:14:53"
   tiff:Make="PENTAX             "
   tiff:Model="PENTAX K-5         "
   tiff:Software="K-5 Ver 1.15           ">
   <exif:Flash
    exif:Fired="False"
    exif:Return="0"
    exif:Mode="2"
    exif:Function="False"
    exif:RedEyeMode="False"/>
   <exif:UserComment>
    <rdf:Alt>
     <rdf:li xml:lang="x-default">Hotel Paříž</rdf:li>
    </rdf:Alt>
   </exif:UserComment>
   <exif:ISOSpeedRatings>
    <rdf:Seq>
     <rdf:li>400</rdf:li>
    </rdf:Seq>
   </exif:ISOSpeedRatings>
   <digiKam:CaptionsAuthorNames>
    <rdf:Alt>
     <rdf:li xml:lang="x-default"/>
    </rdf:Alt>
   </digiKam:CaptionsAuthorNames>
   <digiKam:CaptionsDateTimeStamps>
    <rdf:Alt>
     <rdf:li xml:lang="x-default">2014-03-01T20:47:10</rdf:li>
    </rdf:Alt>
   </digiKam:CaptionsDateTimeStamps>
   <tiff:ImageDescription>
    <rdf:Alt>
     <rdf:li xml:lang="x-default">Hotel Paříž</rdf:li>
    </rdf:Alt>
   </tiff:ImageDescription>
   <tiff:BitsPerSample>
    <rdf:Seq>
     <rdf:li>14</rdf:li>
    </rdf:Seq>
   </tiff:BitsPerSample>
   <dc:creator>
    <rdf:Seq>
     <rdf:li>Michal Thoma</rdf:li>
    </rdf:Seq>
   </dc:creator>
   <dc:rights>
    <rdf:Alt>
     <rdf:li xml:lang="x-default">Michal Thoma</rdf:li>
    </rdf:Alt>
   </dc:rights>
   <dc:description>
    <rdf:Alt>
     <rdf:li xml:lang="x-default">Hotel Pa?í?</rdf:li>
    </rdf:Alt>
   </dc:description>
  </rdf:Description>
 </rdf:RDF>
</x:xmpmeta>
Comment 1 caulier.gilles 2014-03-03 13:08:12 UTC
Created attachment 85398 [details]
Not reproducible with digiKam 4.0.0-beta3

Not reproducible with digiKam 4.0.0-beta3...
Comment 2 caulier.gilles 2014-03-03 13:10:46 UTC
See exiv2 output with file patched in Xmp.dc.description :

[gilles@localhost pipo]$ exiv2 -px DSC00303.JPG 
Xmp.tiff.Software                            XmpText    19  digiKam-4.0.0-beta4
Xmp.tiff.DateTime                            XmpText    19  2013-08-31T18:45:59
Xmp.tiff.Orientation                         XmpText     1  haut, gauche
Xmp.tiff.ImageWidth                          XmpText     4  6000
Xmp.tiff.ImageLength                         XmpText     4  4000
Xmp.tiff.ImageDescription                    LangAlt     1  lang="x-default" Hotel Paříž
Xmp.xmp.CreatorTool                          XmpText    19  digiKam-4.0.0-beta2
Xmp.xmp.Rating                               XmpText     1  0
Xmp.xmp.CreateDate                           XmpText    19  2013-08-31T18:45:59
Xmp.xmp.MetadataDate                         XmpText    19  2013-08-31T18:45:59
Xmp.xmp.ModifyDate                           XmpText    19  2013-08-31T18:45:59
Xmp.digiKam.PickLabel                        XmpText     1  0
Xmp.digiKam.ColorLabel                       XmpText     1  0
Xmp.digiKam.TagsList                         XmpSeq      2  Testtag 2, Testtag 3
Xmp.digiKam.CaptionsAuthorNames              LangAlt     1  lang="x-default" 
Xmp.digiKam.CaptionsDateTimeStamps           LangAlt     1  lang="x-default" 2014-03-03T13:54:48
Xmp.photoshop.Urgency                        XmpText     1  0
Xmp.photoshop.DateCreated                    XmpText    19  2013-08-31T18:45:59
Xmp.MicrosoftPhoto.Rating                    XmpText     1  0
Xmp.MicrosoftPhoto.LastKeywordXMP            XmpBag      2  Testtag 2, Testtag 3
Xmp.exif.DateTimeOriginal                    XmpText    19  2013:08:31 18:45:59
Xmp.exif.PixelXDimension                     XmpText     4  6000
Xmp.exif.PixelYDimension                     XmpText     4  4000
Xmp.exif.UserComment                         LangAlt     1  lang="x-default" Hotel Paříž
Xmp.dc.subject                               XmpBag      2  Testtag 2, Testtag 3
Xmp.dc.description                           LangAlt     1  lang="x-default" Hotel Paříž
Xmp.lr.hierarchicalSubject                   XmpBag      2  Testtag 2, Testtag 

Gilles Caulier
Comment 3 caulier.gilles 2014-03-03 13:13:47 UTC
XMP sidecar generated from patched file :

[gilles@localhost pipo]$ exiv2 -eX DSC00303.JPG 
[gilles@localhost pipo]$ cat DSC00303.xmp 
<?xml version="1.0" encoding="UTF-8"?>
<x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="XMP Core 4.4.0-Exiv2">
 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
  <rdf:Description rdf:about=""
    xmlns:tiff="http://ns.adobe.com/tiff/1.0/"
    xmlns:xmp="http://ns.adobe.com/xap/1.0/"
    xmlns:digiKam="http://www.digikam.org/ns/1.0/"
    xmlns:photoshop="http://ns.adobe.com/photoshop/1.0/"
    xmlns:MicrosoftPhoto="http://ns.microsoft.com/photo/1.0/"
    xmlns:lr="http://ns.adobe.com/lightroom/1.0/"
    xmlns:exif="http://ns.adobe.com/exif/1.0/"
    xmlns:dc="http://purl.org/dc/elements/1.1/"
   tiff:ImageWidth="6000"
   tiff:ImageLength="4000"
   tiff:Orientation="1"
   tiff:YCbCrPositioning="2"
   tiff:XResolution="350/1"
   tiff:YResolution="350/1"
   tiff:ResolutionUnit="2"
   tiff:DateTime="2013-08-31T18:45:59"
   tiff:Make="SONY"
   tiff:Model="SLT-A77V"
   tiff:Software="SLT-A77V v1.07"
   xmp:CreatorTool="digiKam-4.0.0-beta2"
   xmp:Rating="0"
   xmp:CreateDate="2013-08-31T18:45:59"
   xmp:MetadataDate="2013-08-31T18:45:59"
   xmp:ModifyDate="2013-08-31T18:45:59"
   digiKam:PickLabel="0"
   digiKam:ColorLabel="0"
   photoshop:Urgency="0"
   photoshop:DateCreated="2013-08-31"
   MicrosoftPhoto:Rating="0"
   exif:ExifVersion="0230"
   exif:FlashpixVersion="0100"
   exif:ColorSpace="1"
   exif:CompressedBitsPerPixel="5/1"
   exif:PixelXDimension="6000"
   exif:PixelYDimension="4000"
   exif:DateTimeOriginal="2013-08-31T18:45:59"
   exif:DateTimeDigitized="2013-08-31T18:45:59"
   exif:ExposureTime="1/400"
   exif:FNumber="28/10"
   exif:ExposureProgram="3"
   exif:BrightnessValue="16674/2560"
   exif:ExposureBiasValue="0/10"
   exif:MaxApertureValue="760/256"
   exif:MeteringMode="5"
   exif:LightSource="0"
   exif:FocalLength="2000/10"
   exif:FileSource="3"
   exif:SceneType="1"
   exif:CustomRendered="0"
   exif:ExposureMode="0"
   exif:WhiteBalance="0"
   exif:FocalLengthIn35mmFilm="300"
   exif:SceneCaptureType="0"
   exif:Contrast="0"
   exif:Saturation="0"
   exif:Sharpness="0"
   exif:GPSVersionID="2.3.0.0"
   exif:GPSLatitude="43,35.1332500N"
   exif:GPSLongitude="5,31.1799167E"
   exif:GPSAltitudeRef="0"
   exif:GPSAltitude="37847/100"
   exif:GPSTimeStamp="2013-08-31T16:46:01.000000000"
   exif:GPSStatus="A"
   exif:GPSMeasureMode="3"
   exif:GPSDOP="12043/10000"
   exif:GPSSpeedRef="K"
   exif:GPSSpeed="700/1000"
   exif:GPSTrackRef="T"
   exif:GPSTrack="23597/100"
   exif:GPSMapDatum="WGS-84"
   exif:GPSDifferential="0">
   <tiff:ImageDescription>
    <rdf:Alt>
     <rdf:li xml:lang="x-default">Hotel Paříž</rdf:li>
    </rdf:Alt>
   </tiff:ImageDescription>
   <digiKam:TagsList>
    <rdf:Seq>
     <rdf:li>Testtag 2</rdf:li>
     <rdf:li>Testtag 3</rdf:li>
    </rdf:Seq>
   </digiKam:TagsList>
   <digiKam:CaptionsAuthorNames>
    <rdf:Alt>
     <rdf:li xml:lang="x-default"/>
    </rdf:Alt>
   </digiKam:CaptionsAuthorNames>
   <digiKam:CaptionsDateTimeStamps>
    <rdf:Alt>
     <rdf:li xml:lang="x-default">2014-03-03T13:54:48</rdf:li>
    </rdf:Alt>
   </digiKam:CaptionsDateTimeStamps>
   <MicrosoftPhoto:LastKeywordXMP>
    <rdf:Bag>
     <rdf:li>Testtag 2</rdf:li>
     <rdf:li>Testtag 3</rdf:li>
    </rdf:Bag>
   </MicrosoftPhoto:LastKeywordXMP>
   <lr:hierarchicalSubject>
    <rdf:Bag>
     <rdf:li>Testtag 2</rdf:li>
     <rdf:li>Testtag 3</rdf:li>
    </rdf:Bag>
   </lr:hierarchicalSubject>
   <exif:ComponentsConfiguration>
    <rdf:Seq>
     <rdf:li>1</rdf:li>
     <rdf:li>2</rdf:li>
     <rdf:li>3</rdf:li>
     <rdf:li>0</rdf:li>
    </rdf:Seq>
   </exif:ComponentsConfiguration>
   <exif:UserComment>
    <rdf:Alt>
     <rdf:li xml:lang="x-default">Hotel Paříž</rdf:li>
    </rdf:Alt>
   </exif:UserComment>
   <exif:ISOSpeedRatings>
    <rdf:Seq>
     <rdf:li>100</rdf:li>
    </rdf:Seq>
   </exif:ISOSpeedRatings>
   <exif:Flash
    exif:Fired="False"
    exif:Return="0"
    exif:Mode="2"
    exif:Function="False"
    exif:RedEyeMode="False"/>
   <dc:subject>
    <rdf:Bag>
     <rdf:li>Testtag 2</rdf:li>
     <rdf:li>Testtag 3</rdf:li>
    </rdf:Bag>
   </dc:subject>
   <dc:description>
    <rdf:Alt>
     <rdf:li xml:lang="x-default">Hotel Paříž</rdf:li>
    </rdf:Alt>
   </dc:description>
  </rdf:Description>
 </rdf:RDF>
</x:xmpmeta>

See as Xmp.dc:description encoding is fine...

Gilles Caulier
Comment 4 Michal Thoma 2014-03-03 13:16:58 UTC
Very good, thank you!