(*

HTML TAG MERGER 1.0


A script for merging runs of two or more identical HTML tags.

© 1997 Phil Hudson. May be freely used and distributed. Comments and bug reports please to: Released 31 Mar 1997.

Note:

This script does not merge runs of identical nested HTML tag groups.
i.e. it will merge:

<FONT COLOR=RED>some</FONT> <FONT COLOR=RED>text</FONT>

into:

<FONT COLOR=RED>some text</FONT>

but it won't do anything to:

<H1><FONT COLOR=RED>some</FONT></H1> <H1><FONT COLOR=RED>text</FONT></H1>

Revision history:

1.0:
Initial release.
31 Mar 97.

*)
property startTag : "<"
property endTag : "</"

on mergeTags(someHTML)
	
	set mergedHTML to ""
	
	repeat
		
		set tagOff to (offset of startTag in someHTML)
		if (tagOff > 0) then
			
			set mergedHTML to (mergedHTML & (text from character 1 to (tagOff + (length of startTag) - 1) of someHTML))
			set someHTML to (text from character (tagOff + (length of startTag)) to -1 of someHTML)
			-- someHTML now starts with tag body
			
			set tagEndOff to (offset of ">" in someHTML)
			if (tagEndOff > 0) then -- end of tag found
				
				set tagBody to (text from character 1 to tagEndOff of someHTML) -- tag
				
				set mergedHTML to (mergedHTML & tagBody) -- includes terminating ">"
				set someHTML to (text from character (tagEndOff + 1) to -1 of someHTML) -- someHTML now starts with the tagged text
				
				set endTagOff to (offset of endTag in someHTML)
				set nextTagOff to (offset of startTag in someHTML)
				
				if ((endTagOff > 0) and ((nextTagOff = 0) or (endTagOff < nextTagOff))) then
					
					repeat
						set endTagEndOff to (offset of ">" in someHTML)
						if ((endTagEndOff = 0) or ((endTagEndOff + 1) >= (length of someHTML))) then exit repeat
						
						-- end of ending tag found
						set endTagBody to (text from character endTagOff to endTagEndOff of someHTML)
						
						set taggedText to (text from character 1 to (endTagOff - 1) of someHTML) -- excludes terminating tag*****
						
						-- NB this next line drops the "</...>" tag
						set restOfHTML to (text from character (endTagEndOff + 1) to -1 of someHTML)
						-- restOfHTML now starts with text immediately after "</...> tag
						
						set nextTagOff to (offset of startTag in restOfHTML)
						if (nextTagOff = 0) then exit repeat
						
						set whiteSpace to ""
						if (nextTagOff > 1) then
							set allWhite to true
							set newLine to (ASCII character 10)
							repeat with i from 1 to (nextTagOff - 1)
								set theChar to (character i of restOfHTML)
								if ((theChar is not space) and (theChar is not tab) and (theChar is not return) and (theChar is not newLine)) then
									set allWhite to false
									exit repeat
								else
									set whiteSpace to (whiteSpace & theChar)
								end if
							end repeat
							if (not allWhite) then exit repeat
						end if
						
						-- is it the same tag?
						set foo to (text from character (nextTagOff + (length of startTag)) to -1 of restOfHTML) -- foo now starts with body of tag
						
						set nextTagEndOff to (offset of ">" in foo)
						if (nextTagEndOff = 0) then exit repeat
						
						-- end of "<...>" tag found
						set nextTagBody to (text from character 1 to nextTagEndOff of foo)
						if (nextTagBody is not tagBody) then exit repeat -- different tag
						
						set mergedHTML to (mergedHTML & taggedText) -- text covered by previous FONT tag
						set mergedHTML to (mergedHTML & whiteSpace) -- white space
						set someHTML to (text from character (nextTagEndOff + 1) to -1 of foo)
						set endTagOff to nextTagEndOff
						
					end repeat
				end if
			end if
		else -- no more "<...>" tags
			set mergedHTML to (mergedHTML & someHTML)
			exit repeat
		end if
	end repeat
	return mergedHTML
end mergeTags



Top of page
Cool IT in Africa Site design by Phil Hudson. Comments? Email phil.hudson@iname.com.
Top of page