Example 2

import be.arci.html.*;
import java.io.File;
/**Extracts content from HTML documents */
public class HTMLScannerExample2
{
 public static void main(String[] args)
 {
  String[] asTagNames = new String[] { ""};//text content only
  for (int i = 0; i < args.length; i++)
  {
   try {
     //replace with "new HTMLScanner(new URL(args[i]));" for networked documents
    HTMLScanner hs = new HTMLScanner(new File(args[i]));
    HTMLTag[] tags = hs.getTags(asTagNames, true);//true: discard tags we are not interested in
    StringBuffer content = new StringBuffer();
    for (int j = 0; j < tags.length; j++)
     tags[j].accumulateContent(content);
    System.out.println(content);
   } catch (Exception e) { e.printStackTrace(); } //URL exceptions or IO exceptions
  }
 }
}