Example using xmlplarser's saxdriver to parse huge files
// description of your code here
#!/usr/bin/evn ruby ## to run this you call run_amazon_import(datafile) with dataflie = a file to open for parsing, which later is opened based on: ## ("#{RAILS_ROOT}/data/" + datafile + ".xml") ## This is hard coded to look at Item elements, and in this example ## parses out the ASIN as @@product_id and ItemAttributes/Title as @@name ## see check_position_space(name,ch) require 'xml/saxdriver' @flag_item = false @@finaldata = [] @@vars = [] @@positionSpace = [] @@currentName = [] def reset_vals @@product_id = nil @@name = nil end def check_position_space(name,ch) # with each value within item we check to see if the # @@positionSpace (a concatenation of each value's name # equals the value we are looking for, if so put it in a global # variable if @@positionSpace == 'ASIN' @@product_id = ch elsif @@positionSpace == 'ItemAttributesTitle' # if I did this again, I would name @@positionSpace # with / between names in startElement so it would be simlar to other # ruby xml naming schems so: # @@positionSpace == 'ItemAttributesTitle' would be: # @@positionSpace == 'ItemAttributes/Title' @@name = ch end end class TestHandler < XML::SAX::HandlerBase attr_accessor :data def startDocument @@data = [] end def startElement(name, attr) @flag_item = true if name == 'Item' @@positionSpace = '' if name == 'Item' if @flag_item == true and name != 'Item' @@positionSpace = @@positionSpace + name elsif name == 'Item' reset_vals end @@currentName = name end def endElement(name) if @flag_item == true and name != 'Item' lenName = name.length @@positionSpace = @@positionSpace[0, @@positionSpace.length - lenName] end if name == 'Item' @@finaldata << @@data.to_s @@data = [] ## Here I would have a fully parsed Item and do something with it end @flag_item = false if name == 'Item' end def characters(ch, start, length) check_position_space(@@currentName, ch[start, length]) end end def run_amazon_import(datafile) @@datafile = datafile p = XML::SAX::Helpers::ParserFactory.makeParser("XML::Parser::SAXDriver") h = TestHandler.new p.setDocumentHandler(h) p.setDTDHandler(h) p.setEntityResolver(h) p.setErrorHandler(h) begin p.parse("#{RAILS_ROOT}/data/" + datafile + ".xml") rescue XML::SAX::SAXParseException p(["ParseError", $!.getSystemId, $!.getLineNumber, $!.getMessage]) end end