(*
HTML TAG MERGER 1.0
A script for merging runs of two or more identical HTML tags.
© 1997 Phil Hudson. May be freely used and distributed. Comments
and bug reports please to:
Released 31 Mar 1997.
Note:
- This script does not merge runs of identical nested HTML tag groups.
- i.e. it will merge:
<FONT COLOR=RED>some</FONT>
<FONT COLOR=RED>text</FONT>
into:
<FONT COLOR=RED>some text</FONT>
but it won't do anything to:
<H1><FONT COLOR=RED>some</FONT></H1> <H1><FONT COLOR=RED>text</FONT></H1>
Revision history:
- 1.0:
- Initial release.
31 Mar 97.
*)
property startTag : "<"
property endTag : "</"
on mergeTags(someHTML)
set mergedHTML to ""
repeat
set tagOff to (offset of startTag in someHTML)
if (tagOff > 0) then
set mergedHTML to (mergedHTML & (text from character 1 to (tagOff + (length of startTag) - 1) of someHTML))
set someHTML to (text from character (tagOff + (length of startTag)) to -1 of someHTML)
-- someHTML now starts with tag body
set tagEndOff to (offset of ">" in someHTML)
if (tagEndOff > 0) then -- end of tag found
set tagBody to (text from character 1 to tagEndOff of someHTML) -- tag
set mergedHTML to (mergedHTML & tagBody) -- includes terminating ">"
set someHTML to (text from character (tagEndOff + 1) to -1 of someHTML) -- someHTML now starts with the tagged text
set endTagOff to (offset of endTag in someHTML)
set nextTagOff to (offset of startTag in someHTML)
if ((endTagOff > 0) and ((nextTagOff = 0) or (endTagOff < nextTagOff))) then
repeat
set endTagEndOff to (offset of ">" in someHTML)
if ((endTagEndOff = 0) or ((endTagEndOff + 1) >= (length of someHTML))) then exit repeat
-- end of ending tag found
set endTagBody to (text from character endTagOff to endTagEndOff of someHTML)
set taggedText to (text from character 1 to (endTagOff - 1) of someHTML) -- excludes terminating tag*****
-- NB this next line drops the "</...>" tag
set restOfHTML to (text from character (endTagEndOff + 1) to -1 of someHTML)
-- restOfHTML now starts with text immediately after "</...> tag
set nextTagOff to (offset of startTag in restOfHTML)
if (nextTagOff = 0) then exit repeat
set whiteSpace to ""
if (nextTagOff > 1) then
set allWhite to true
set newLine to (ASCII character 10)
repeat with i from 1 to (nextTagOff - 1)
set theChar to (character i of restOfHTML)
if ((theChar is not space) and (theChar is not tab) and (theChar is not return) and (theChar is not newLine)) then
set allWhite to false
exit repeat
else
set whiteSpace to (whiteSpace & theChar)
end if
end repeat
if (not allWhite) then exit repeat
end if
-- is it the same tag?
set foo to (text from character (nextTagOff + (length of startTag)) to -1 of restOfHTML) -- foo now starts with body of tag
set nextTagEndOff to (offset of ">" in foo)
if (nextTagEndOff = 0) then exit repeat
-- end of "<...>" tag found
set nextTagBody to (text from character 1 to nextTagEndOff of foo)
if (nextTagBody is not tagBody) then exit repeat -- different tag
set mergedHTML to (mergedHTML & taggedText) -- text covered by previous FONT tag
set mergedHTML to (mergedHTML & whiteSpace) -- white space
set someHTML to (text from character (nextTagEndOff + 1) to -1 of foo)
set endTagOff to nextTagEndOff
end repeat
end if
end if
else -- no more "<...>" tags
set mergedHTML to (mergedHTML & someHTML)
exit repeat
end if
end repeat
return mergedHTML
end mergeTags
|
|
|