]> git.stg.codes - stg.git/blob - doc/xslt/roundtrip/wordml2normalise.xsl
Added handy encoding functions.
[stg.git] / doc / xslt / roundtrip / wordml2normalise.xsl
1 <xsl:stylesheet version="1.0"
2   xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
3   xmlns:w="http://schemas.microsoft.com/office/word/2003/wordml"
4   xmlns:v="urn:schemas-microsoft-com:vml"
5   xmlns:w10="urn:schemas-microsoft-com:office:word"
6   xmlns:sl="http://schemas.microsoft.com/schemaLibrary/2003/core"
7   xmlns:aml="http://schemas.microsoft.com/aml/2001/core"
8   xmlns:wx="http://schemas.microsoft.com/office/word/2003/auxHint"
9   xmlns:o="urn:schemas-microsoft-com:office:office"
10   xmlns:dt="uuid:C2F41010-65B3-11d1-A29F-00AA00C14882"
11   xmlns:dbk='http://docbook.org/ns/docbook'
12   xmlns:rnd='http://docbook.org/ns/docbook/roundtrip'
13   xmlns:xlink='http://www.w3.org/1999/xlink'
14   xmlns:exsl='http://exslt.org/common'
15   exclude-result-prefixes='w v w10 sl aml wx o dt'
16   extension-element-prefixes='exsl'>
17
18   <xsl:import href='normalise-common.xsl'/>
19
20   <xsl:output method='xml' indent="yes"/>
21
22   <!-- ********************************************************************
23        $Id: wordml2normalise.xsl 8105 2008-08-15 01:29:11Z balls $
24        ********************************************************************
25
26        This file is part of the XSL DocBook Stylesheet distribution.
27        See ../README or http://nwalsh.com/docbook/xsl/ for copyright
28        and other information.
29
30        ******************************************************************** -->
31
32   <xsl:strip-space elements='*'/>
33   <xsl:preserve-space elements='w:t'/>
34
35   <xsl:key name='style'
36     match='w:style'
37     use='@w:styleId'/>
38
39   <xsl:template match="w:wordDocument">
40     <dbk:article>
41       <xsl:apply-templates select='w:body'/>
42     </dbk:article>
43   </xsl:template>
44
45   <xsl:template match='wx:borders |
46                        wx:margin-left'/>
47
48   <xsl:template match='w:p'>
49     <xsl:variable name='style'>
50       <xsl:call-template name='rnd:map-paragraph-style'>
51         <xsl:with-param name='style' select='w:pPr/w:pStyle/@w:val'/>
52       </xsl:call-template>
53     </xsl:variable>
54     <xsl:choose>
55       <xsl:when test='aml:annotation[@w:type = "Word.Deletion"] and
56                       not(aml:annotation[@w:type != "Word.Deletion"]) and
57                       count(*) = count(aml:annotation|w:pPr)'/>
58
59       <!-- Eliminate paragraphs that have no content.
60            These are section or page breaks.
61         -->
62       <xsl:when test='not(w:r|w:hlink|w:tbl) and
63                       w:pPr/w:sectPr'/>
64
65       <xsl:otherwise>
66         <dbk:para>
67           <xsl:attribute name='rnd:style'>
68             <xsl:value-of select='$style'/>
69           </xsl:attribute>
70           <xsl:if test='w:pPr/w:pStyle/@w:val and
71                         $style != w:pPr/w:pStyle/@w:val'>
72             <xsl:attribute name='rnd:original-style'>
73               <xsl:value-of select='w:pPr/w:pStyle/@w:val'/>
74             </xsl:attribute>
75           </xsl:if>
76
77           <xsl:if test='w:r[1][w:rPr/w:rStyle/@w:val = "attributes"] and
78                         w:r[2][w:rPr/w:rStyle/@w:val = "CommentReference"]'>
79             <xsl:apply-templates select='w:r[2]//w:r[w:rPr/w:rStyle/@w:val = "attribute-name"]'
80               mode='rnd:attributes'/>
81           </xsl:if>
82
83           <xsl:apply-templates/>
84         </dbk:para>
85       </xsl:otherwise>
86     </xsl:choose>
87   </xsl:template>
88
89   <xsl:template match='*' mode='rnd:attributes'>
90     <xsl:attribute name='{w:t}'>
91       <xsl:apply-templates select='following-sibling::w:r[w:rPr/w:rStyle/@w:val = "attribute-value"][1]'
92         mode='rnd:attribute-value'/>
93     </xsl:attribute>
94   </xsl:template>
95
96   <xsl:template match='w:r'>
97     <xsl:param name='do-vert-align' select='true()'/>
98
99     <xsl:variable name='role'>
100       <xsl:choose>
101         <xsl:when test='w:rPr/w:b and
102                         w:rPr/w:i'>
103           <xsl:text>bold-italic</xsl:text>
104         </xsl:when>
105         <xsl:when test='w:rPr/w:b'>
106           <xsl:text>bold</xsl:text>
107         </xsl:when>
108         <xsl:when test='w:rPr/w:i'>
109           <xsl:text>italic</xsl:text>
110         </xsl:when>
111         <xsl:when test='w:rPr/w:u'>
112           <xsl:text>underline</xsl:text>
113         </xsl:when>
114         <!-- TODO: add support for other styles -->
115       </xsl:choose>
116     </xsl:variable>
117     <xsl:variable name='style'>
118       <xsl:if test='w:rPr/w:rStyle'>
119         <xsl:value-of select='w:rPr/w:rStyle/@w:val'/>
120       </xsl:if>
121     </xsl:variable>
122
123     <xsl:choose>
124       <xsl:when test='w:rPr/w:rStyle/@w:val = "attributes"'/>
125       <xsl:when test='w:rPr/w:rStyle/@w:val = "CommentReference"'/>
126       <xsl:when test='w:pict'>
127         <!-- "filename" is where the image data gets extracted to -->
128         <xsl:variable name='filename'>
129           <xsl:call-template name='rnd:image-filename'/>
130         </xsl:variable>
131         <!-- "target" is the URL that will be the target of the imagedata hyperlink.
132              This may or may not be related to the physical filename.
133           -->
134         <xsl:variable name='target'>
135           <xsl:call-template name='rnd:image-target'>
136             <xsl:with-param name='filename' select='$filename'/>
137           </xsl:call-template>
138         </xsl:variable>
139
140         <xsl:call-template name='rnd:handle-image-data'>
141           <xsl:with-param name='filename' select='$filename'/>
142           <xsl:with-param name='data' select='w:pict/w:binData'/>
143         </xsl:call-template>
144
145         <dbk:inlinemediaobject>
146           <dbk:imageobject>
147             <dbk:imagedata fileref='{$target}'>
148               <xsl:if test='w:pict/v:shape/@style'>
149                 <xsl:attribute name='width'>
150                   <xsl:value-of select='normalize-space(substring-before(substring-after(w:pict/v:shape/@style, "width:"), ";"))'/>
151                 </xsl:attribute>
152                 <xsl:attribute name='depth'>
153                   <xsl:value-of select='normalize-space(substring-after(w:pict/v:shape/@style, "height:"))'/>
154                 </xsl:attribute>
155               </xsl:if>
156             </dbk:imagedata>
157           </dbk:imageobject>
158         </dbk:inlinemediaobject>
159       </xsl:when>
160       <xsl:when test='$do-vert-align and
161                       w:rPr/w:vertAlign/@w:val = "subscript"'>
162         <dbk:subscript>
163           <xsl:apply-templates select='.'>
164             <xsl:with-param name='do-vert-align' select='false()'/>
165           </xsl:apply-templates>
166         </dbk:subscript>
167       </xsl:when>
168       <xsl:when test='$do-vert-align and
169                       w:rPr/w:vertAlign/@w:val = "superscript"'>
170         <dbk:superscript>
171           <xsl:apply-templates select='.'>
172             <xsl:with-param name='do-vert-align' select='false()'/>
173           </xsl:apply-templates>
174         </dbk:superscript>
175       </xsl:when>
176       <xsl:when test='w:endnoteRef and
177                       parent::w:p/parent::w:endnote and
178                       count(w:rPr|w:endnoteRef) = count(*)'/>
179       <xsl:when test='w:footnoteRef'/> <!-- is a label supplied? -->
180       <xsl:when test='w:footnote|w:endnote'>
181         <dbk:footnote>
182           <xsl:apply-templates select='w:footnote|w:endnote'/>
183         </dbk:footnote>
184       </xsl:when>
185       <xsl:when test='$role != "" or $style != ""'>
186         <dbk:emphasis>
187           <xsl:if test='$role != ""'>
188             <xsl:attribute name='role'>
189               <xsl:value-of select='$role'/>
190             </xsl:attribute>
191           </xsl:if>
192           <xsl:if test='$style != ""'>
193             <xsl:attribute name='rnd:style'>
194               <xsl:call-template name='rnd:map-character-style'>
195                 <xsl:with-param name='style' select='$style'/>
196               </xsl:call-template>
197             </xsl:attribute>
198           </xsl:if>
199           <xsl:apply-templates/>
200         </dbk:emphasis>
201       </xsl:when>
202       <xsl:otherwise>
203         <xsl:apply-templates/>
204       </xsl:otherwise>
205     </xsl:choose>
206   </xsl:template>
207
208   <!-- An application may wish to override these templates -->
209
210   <!-- rnd:image-filename determines the filename of the physical file
211        to which the image data should be written.
212     -->
213   <xsl:template name='rnd:image-filename'>
214     <xsl:param name='pict' select='w:pict'/>
215
216     <xsl:choose>
217       <xsl:when test='contains($pict/w:binData/@w:name, "wordml://")'>
218         <xsl:value-of select='substring-after($pict/w:binData/@w:name, "wordml://")'/>
219       </xsl:when>
220       <xsl:otherwise>
221         <xsl:text>image</xsl:text>
222         <xsl:value-of select='count($pict/preceding::w:pict) + 1'/>
223         <xsl:text>.jpg</xsl:text>
224       </xsl:otherwise>
225     </xsl:choose>
226   </xsl:template>
227
228   <!-- rnd:image-target determines the URL for the image data.
229        This may or may not be related to the physical filename.
230     -->
231   <xsl:template name='rnd:image-target'>
232     <xsl:param name='filename'/>
233     <xsl:param name='pict' select='w:pict'/>
234
235     <xsl:value-of select='$filename'/>
236   </xsl:template>
237
238   <!-- rnd:handle-image-data receives the base64-encoded data and a filename
239        for the physical file to which the data should be written.
240        Since XSLT cannot natively handle binary data, this implementation
241        just writes the undecoded data to the nominated file.
242        A real application would decode the data into a binary representation.
243     -->
244   <xsl:template name='rnd:handle-image-data'>
245     <xsl:param name='filename'/>
246     <xsl:param name='data'/>
247
248     <xsl:if test='element-available("exsl:document")'>
249       <exsl:document href='{$filename}.b64' method='text'>
250         <xsl:value-of select='w:pict/w:binData'/>
251       </exsl:document>
252     </xsl:if>
253   </xsl:template>
254
255   <xsl:template match='w:hlink'>
256     <dbk:link xlink:href='{@w:dest}'>
257       <xsl:apply-templates/>
258     </dbk:link>
259   </xsl:template>
260
261   <!-- Soft returns don't really have an equivalent in DocBook,
262      - except in literal line environments.
263     -->
264   <xsl:template match='w:br'>
265     <xsl:text>&#xa;</xsl:text>
266   </xsl:template>
267
268   <xsl:template match='w:tbl'>
269     <xsl:variable name='tbl.style'
270       select='key("style", w:tblPr/w:tblStyle/@w:val) | .'/>
271
272     <xsl:variable name='border.top'>
273       <xsl:choose>
274         <xsl:when test='$tbl.style/w:tblPr/w:tblBorders/w:top[not(@w:val = "nil" or @w:val = "none")]'>1</xsl:when>
275         <xsl:when test='$tbl.style/w:tblPr/w:tblBorders/w:top[@w:val = "nil" or @w:val = "none"]'>0</xsl:when>
276         <xsl:when test='w:tr[1]/w:tc[w:tcPr/w:tcBorders/w:top[not(@w:val = "nil" or @w:val = "none")]]'>1</xsl:when>
277         <xsl:otherwise>0</xsl:otherwise>
278       </xsl:choose>
279     </xsl:variable>
280     <xsl:variable name='border.bottom'>
281       <xsl:choose>
282         <xsl:when test='$tbl.style/w:tblPr/w:tblBorders/w:bottom[not(@w:val = "nil" or @w:val = "none")]'>1</xsl:when>
283         <xsl:when test='$tbl.style/w:tblPr/w:tblBorders/w:bottom[@w:val = "nil" or @w:val = "none"]'>0</xsl:when>
284         <xsl:when test='w:tr[1]/w:tc[w:tcPr/w:tcBorders/w:bottom[not(@w:val = "nil" or @w:val = "none")]]'>1</xsl:when>
285         <xsl:otherwise>0</xsl:otherwise>
286       </xsl:choose>
287     </xsl:variable>
288     <xsl:variable name='border.left'>
289       <xsl:choose>
290         <xsl:when test='$tbl.style/w:tblPr/w:tblBorders/w:left[not(@w:val = "nil" or @w:val = "none")]'>1</xsl:when>
291         <xsl:when test='$tbl.style/w:tblPr/w:tblBorders/w:left[@w:val = "nil" or @w:val = "none"]'>0</xsl:when>
292         <xsl:when test='w:tr[1]/w:tc[w:tcPr/w:tcBorders/w:left[not(@w:val = "nil" or @w:val = "none")]]'>1</xsl:when>
293         <xsl:otherwise>0</xsl:otherwise>
294       </xsl:choose>
295     </xsl:variable>
296     <xsl:variable name='border.right'>
297       <xsl:choose>
298         <xsl:when test='$tbl.style/w:tblPr/w:tblBorders/w:right[not(@w:val = "nil" or @w:val = "none")]'>1</xsl:when>
299         <xsl:when test='$tbl.style/w:tblPr/w:tblBorders/w:right[@w:val = "nil" or @w:val = "none"]'>0</xsl:when>
300         <xsl:when test='w:tr[1]/w:tc[w:tcPr/w:tcBorders/w:rightt[not(@w:val = "nil" or @w:val = "none")]]'>1</xsl:when>
301         <xsl:otherwise>0</xsl:otherwise>
302       </xsl:choose>
303     </xsl:variable>
304
305     <dbk:informaltable>
306       <xsl:choose>
307         <xsl:when test='$border.top = "1" and $border.bottom = "1" and
308                         $border.left = "1" and $border.right = "1"'>
309           <xsl:attribute name='frame'>all</xsl:attribute>
310         </xsl:when>
311         <xsl:when test='$border.top = "1" and $border.bottom = "1"'>
312           <xsl:attribute name='frame'>topbot</xsl:attribute>
313         </xsl:when>
314         <xsl:when test='$border.left = "1" and $border.right = "1"'>
315           <xsl:attribute name='frame'>sides</xsl:attribute>
316         </xsl:when>
317         <xsl:when test='$border.top = "1"'>
318           <xsl:attribute name='frame'>top</xsl:attribute>
319         </xsl:when>
320         <xsl:when test='$border.bottom = "1"'>
321           <xsl:attribute name='frame'>bottom</xsl:attribute>
322         </xsl:when>
323       </xsl:choose>
324
325       <!-- TODO: analyse column widths -->
326
327       <dbk:tgroup>
328         <xsl:apply-templates select='w:tblGrid'/>
329         <xsl:choose>
330           <xsl:when test='$tbl.style/w:tblStylePr[@w:type = "firstRow"]/w:trPr/w:tblHeader'>
331             <dbk:thead>
332               <xsl:apply-templates select='w:tr[1]'/>
333             </dbk:thead>
334             <dbk:tbody>
335               <xsl:apply-templates select='w:tr[position() != 1]'/>
336             </dbk:tbody>
337           </xsl:when>
338           <xsl:otherwise>
339             <dbk:tbody>
340               <xsl:apply-templates select='w:tr'/>
341             </dbk:tbody>
342           </xsl:otherwise>
343         </xsl:choose>
344       </dbk:tgroup>
345     </dbk:informaltable>
346   </xsl:template>
347   <xsl:template match='w:tblPr'/>
348   <xsl:template match='w:tblGrid/w:gridCol'>
349     <dbk:colspec colwidth='{@w:w}*'
350       colname='column-{count(preceding-sibling::w:gridCol) + 1}'/>
351   </xsl:template>
352   <xsl:template match='w:tr'>
353     <dbk:row>
354       <xsl:apply-templates/>
355     </dbk:row>
356   </xsl:template>
357   <xsl:template match='w:tc'>
358     <xsl:variable name='tbl.style'
359       select='ancestor::w:tbl[1] |
360               key("style", ancestor::w:tbl[1]/w:tblPr/w:tblStyle/@w:val)'/>
361
362     <dbk:entry>
363       <xsl:if test='$tbl.style/w:tblPr/w:tblBorders/w:insideH[not(@w:val = "nil" or @w:val = "none")] |
364                     w:tcPr/w:tcBorders/w:bottom[not(@w:val = "nil" or @w:val = "none")]'>
365         <xsl:attribute name='rowsep'>1</xsl:attribute>
366       </xsl:if>
367       <xsl:if test='$tbl.style/w:tblPr/w:tblBorders/w:insideV[not(@w:val = "nil" or @w:val = "none")] |
368                     w:tcPr/w:tcBorders/w:right[not(@w:val = "nil" or @w:val = "none")]'>
369         <xsl:attribute name='colsep'>1</xsl:attribute>
370       </xsl:if>
371
372       <xsl:variable name='this.colnum'
373         select='count(preceding-sibling::w:tc) + 1 +
374                 sum(preceding-sibling::w:tc/w:tcPr/w:gridSpan/@w:val) -
375                 count(preceding-sibling::w:tc/w:tcPr/w:gridSpan[@w:val])'/>
376
377       <xsl:if test='w:tcPr/w:gridSpan[@w:val > 1]'>
378         <xsl:attribute name='namest'>
379           <xsl:text>column-</xsl:text>
380           <xsl:value-of select='$this.colnum'/>
381         </xsl:attribute>
382         <xsl:attribute name='nameend'>
383           <xsl:text>column-</xsl:text>
384           <xsl:value-of select='$this.colnum + w:tcPr/w:gridSpan/@w:val - 1'/>
385         </xsl:attribute>
386       </xsl:if>
387
388       <xsl:if test='w:tcPr/w:vmerge[@w:val = "restart"]'>
389         <xsl:attribute name='morerows'>
390           <xsl:call-template name='rnd:count-rowspan'>
391             <xsl:with-param name='row' select='../following-sibling::w:tr[1]'/>
392             <xsl:with-param name='colnum' select='$this.colnum'/>
393           </xsl:call-template>
394         </xsl:attribute>
395       </xsl:if>
396
397       <xsl:apply-templates/>
398     </dbk:entry>
399   </xsl:template>
400
401   <xsl:template match='w:pStyle |
402                        w:rStyle |
403                        w:proofErr |
404                        w:fldData |
405                        w:instrText'/>
406
407   <xsl:template name='rnd:count-rowspan'>
408     <xsl:param name='row' select='/..'/>
409     <xsl:param name='colnum' select='0'/>
410
411     <xsl:variable name='cell'
412       select='$row/w:tc[count(preceding-sibling::w:tc) + 1 +
413               sum(preceding-sibling::w:tc/w:tcPr/w:gridSpan/@w:val) -
414               count(preceding-sibling::w:tc/w:tcPr/w:gridSpan[@w:val]) = $colnum]'/>
415
416     <xsl:choose>
417       <xsl:when test='not($cell)'>
418         <xsl:text>0</xsl:text>
419       </xsl:when>
420       <xsl:when test='$cell/w:tcPr/w:vmerge[not(@w:val = "restart")]'>
421         <xsl:variable name='remainder'>
422           <xsl:call-template name='rnd:count-rowspan'>
423             <xsl:with-param name='row'
424               select='$row/following-sibling::w:tr[1]'/>
425             <xsl:with-param name='colnum' select='$colnum'/>
426           </xsl:call-template>
427         </xsl:variable>
428         <xsl:value-of select='$remainder + 1'/>
429       </xsl:when>
430       <xsl:otherwise>0</xsl:otherwise>
431     </xsl:choose>
432   </xsl:template>
433
434   <xsl:template match='w:hdr|w:ftr'/>
435
436   <xsl:template match='aml:annotation'>
437     <xsl:choose>
438       <xsl:when test='@w:type = "Word.Deletion"'/>
439       <xsl:otherwise>
440         <xsl:apply-templates/>
441       </xsl:otherwise>
442     </xsl:choose>
443   </xsl:template>
444
445 </xsl:stylesheet>