Example 1

import be.arci.html.*;
import java.io.File;
import java.util.Vector;
/**Extracts hyperlinks from HTML documents. */
public class HTMLScannerExample1
{
 public static void main(String[] args)
 {
  //element [0] == null: not interested in text content
  String[] asTagNames = new String[] { null, "IMG", "A", "BODY", "FRAME"};
  for (int i = 0; i < args.length; i++)
  {
   try {
    Vector vec = new Vector();
     //replace with "new HTMLScanner(new URL(args[i]));" for networked documents
    HTMLScanner hs = new HTMLScanner(new File(args[i]));
    HTMLTag[] tags = hs.getTags(asTagNames, true);//true: discard tags we are not interested in
    for (int j = 0; j < tags.length; j++)
    {
     String link = null;
     switch (tags[j].iID)
     {
      case 1: //IMG
      case 4: //FRAME
       link = tags[j].getAttribute("src");
       break;
      case 2: //A
       link = tags[j].getAttribute("href");
       break;
      case 3: //BODY
       link = tags[j].getAttribute("background");
       break;
      default: 
       break;
     }
     if (link != null && link.length() > 0)
      vec.addElement(link);
    }
    for (int j = 0; j < vec.size(); j++)
     System.out.println(vec.elementAt(j));
   } catch (Exception e) { e.printStackTrace(); } //URL exceptions or IO exceptions
  }
 }
}