/* * Spider.java * * Created on November 6, 2001, 10:29 AM */ import java.awt.*; //for basic events import java.io.*; //for io exception import java.net.*; //for URLS import javax.swing.JOptionPane; //for joptionpane import javax.swing.event.*; //for hypertextlistener import javax.swing.text.*;// for bad location exception import javax.swing.text.html.*; //for html editor kit import javax.swing.DefaultListModel; // model for the jList import javax.swing.ImageIcon; //for image icon import java.util.*; //for hash /** * * @author Administrator * @version */ public class Spider extends javax.swing.JFrame { /** Creates new form Spider */ public Spider() { hs = new HashSet(); hrefmodel = new DefaultListModel(); imgmodel = new DefaultListModel(); initComponents (); pack (); } /** This method is called from within the constructor to * initialize the form. * WARNING: Do NOT modify this code. The content of this method is * always regenerated by the FormEditor. */ private void initComponents() {//GEN-BEGIN:initComponents jMenuBar1 = new javax.swing.JMenuBar(); jMenu1 = new javax.swing.JMenu(); jMenuItem1 = new javax.swing.JMenuItem(); jMenuItem2 = new javax.swing.JMenuItem(); jMIExit = new javax.swing.JMenuItem(); jMenu2 = new javax.swing.JMenu(); jMenuItem4 = new javax.swing.JMenuItem(); jMenuItem5 = new javax.swing.JMenuItem(); jMenuItem6 = new javax.swing.JMenuItem(); jMenu3 = new javax.swing.JMenu(); jMenuItem7 = new javax.swing.JMenuItem(); jMenuItem8 = new javax.swing.JMenuItem(); jSPPage = new javax.swing.JScrollPane(); jEPDisplay = new javax.swing.JEditorPane(); jSPImage = new javax.swing.JScrollPane(); jLabGraphic = new javax.swing.JLabel(); jPanel1 = new javax.swing.JPanel(); jPanel3 = new javax.swing.JPanel(); jBGetPage = new javax.swing.JButton(); jTFGURL = new javax.swing.JTextField(); jBGetPages = new javax.swing.JButton(); jPanel2 = new javax.swing.JPanel(); jBClearImg = new javax.swing.JButton(); jBClearDocs = new javax.swing.JButton(); jPanel4 = new javax.swing.JPanel(); jPanel5 = new javax.swing.JPanel(); jLabel3 = new javax.swing.JLabel(); jTFCURL = new javax.swing.JTextField(); jPanel6 = new javax.swing.JPanel(); jPanel7 = new javax.swing.JPanel(); jBDImage = new javax.swing.JButton(); jBDPage = new javax.swing.JButton(); jSPHREF = new javax.swing.JScrollPane(); jLHREF = new javax.swing.JList(); jSPImageList = new javax.swing.JScrollPane(); jLImage = new javax.swing.JList(); jMenu1.setText("Menu"); jMenuItem1.setText("Item"); jMenu1.add(jMenuItem1); jMenuItem2.setText("Item"); jMenu1.add(jMenuItem2); jMIExit.setText("Exit"); jMIExit.addActionListener(new java.awt.event.ActionListener() { public void actionPerformed(java.awt.event.ActionEvent evt) { jMIExit(evt); } } ); jMenu1.add(jMIExit); jMenuBar1.add(jMenu1); jMenu2.setText("Menu"); jMenuItem4.setText("Item"); jMenu2.add(jMenuItem4); jMenuItem5.setText("Item"); jMenu2.add(jMenuItem5); jMenuItem6.setText("Item"); jMenu2.add(jMenuItem6); jMenuBar1.add(jMenu2); jMenu3.setText("Menu"); jMenuItem7.setText("Item"); jMenu3.add(jMenuItem7); jMenuItem8.setText("Item"); jMenu3.add(jMenuItem8); jMenuBar1.add(jMenu3); getContentPane().setLayout(new java.awt.GridBagLayout()); java.awt.GridBagConstraints gridBagConstraints1; addWindowListener(new java.awt.event.WindowAdapter() { public void windowClosing(java.awt.event.WindowEvent evt) { exitForm(evt); } } ); jEPDisplay.setMinimumSize(new java.awt.Dimension(500, 500)); jEPDisplay.setEditable(false); jEPDisplay.addHyperlinkListener(new javax.swing.event.HyperlinkListener() { public void hyperlinkUpdate(javax.swing.event.HyperlinkEvent evt) { jEPDisplayHyperlinkUpdate(evt); } } ); jSPPage.setViewportView(jEPDisplay); gridBagConstraints1 = new java.awt.GridBagConstraints(); gridBagConstraints1.gridx = 0; gridBagConstraints1.gridy = 1; gridBagConstraints1.gridheight = 3; gridBagConstraints1.fill = java.awt.GridBagConstraints.BOTH; gridBagConstraints1.weightx = 10.0; gridBagConstraints1.weighty = 10.0; getContentPane().add(jSPPage, gridBagConstraints1); jSPImage.setPreferredSize(new java.awt.Dimension(200, 200)); jLabGraphic.setPreferredSize(new java.awt.Dimension(300, 400)); jLabGraphic.setBackground(java.awt.Color.white); jLabGraphic.setMinimumSize(new java.awt.Dimension(200, 300)); jLabGraphic.setHorizontalAlignment(javax.swing.SwingConstants.LEFT); jLabGraphic.setVerticalAlignment(javax.swing.SwingConstants.TOP); jSPImage.setViewportView(jLabGraphic); gridBagConstraints1 = new java.awt.GridBagConstraints(); gridBagConstraints1.gridx = 1; gridBagConstraints1.gridy = 1; gridBagConstraints1.fill = java.awt.GridBagConstraints.BOTH; gridBagConstraints1.weightx = 2.0; gridBagConstraints1.weighty = 1.0; getContentPane().add(jSPImage, gridBagConstraints1); jPanel1.setLayout(new javax.swing.BoxLayout(jPanel1, 0)); jPanel3.setLayout(new javax.swing.BoxLayout(jPanel3, 0)); jBGetPage.setText("Get Page"); jBGetPage.addActionListener(new java.awt.event.ActionListener() { public void actionPerformed(java.awt.event.ActionEvent evt) { jBGetPage(evt); } } ); jPanel3.add(jBGetPage); jTFGURL.setText("http://www2.sis.pitt.edu/"); jPanel3.add(jTFGURL); jBGetPages.setText("Get Pages"); jBGetPages.addActionListener(new java.awt.event.ActionListener() { public void actionPerformed(java.awt.event.ActionEvent evt) { jBGetPagesActionPerformed(evt); } } ); jPanel3.add(jBGetPages); jPanel1.add(jPanel3); jBClearImg.setText("Clear Images"); jBClearImg.addActionListener(new java.awt.event.ActionListener() { public void actionPerformed(java.awt.event.ActionEvent evt) { jButton1ActionPerformed(evt); } } ); jPanel2.add(jBClearImg); jBClearDocs.setText("Clear Docs"); jBClearDocs.addActionListener(new java.awt.event.ActionListener() { public void actionPerformed(java.awt.event.ActionEvent evt) { jButton3ActionPerformed(evt); } } ); jPanel2.add(jBClearDocs); jPanel1.add(jPanel2); gridBagConstraints1 = new java.awt.GridBagConstraints(); gridBagConstraints1.gridx = 0; gridBagConstraints1.gridy = 4; gridBagConstraints1.gridwidth = 2; gridBagConstraints1.fill = java.awt.GridBagConstraints.HORIZONTAL; getContentPane().add(jPanel1, gridBagConstraints1); jPanel4.setLayout(new javax.swing.BoxLayout(jPanel4, 0)); jPanel5.setLayout(new javax.swing.BoxLayout(jPanel5, 0)); jLabel3.setText("Current URL"); jPanel5.add(jLabel3); jTFCURL.setPreferredSize(new java.awt.Dimension(200, 20)); jTFCURL.setMinimumSize(new java.awt.Dimension(200, 20)); jPanel5.add(jTFCURL); jPanel4.add(jPanel5); jPanel6.setLayout(new javax.swing.BoxLayout(jPanel6, 0)); jPanel4.add(jPanel6); jPanel7.setLayout(new javax.swing.BoxLayout(jPanel7, 0)); jPanel7.setMaximumSize(new java.awt.Dimension(300, 35)); jBDImage.setText("Display Image"); jPanel7.add(jBDImage); jBDPage.setText("Display Page"); jPanel7.add(jBDPage); jPanel4.add(jPanel7); gridBagConstraints1 = new java.awt.GridBagConstraints(); gridBagConstraints1.gridx = 0; gridBagConstraints1.gridy = 0; gridBagConstraints1.gridwidth = 2; gridBagConstraints1.fill = java.awt.GridBagConstraints.HORIZONTAL; getContentPane().add(jPanel4, gridBagConstraints1); jLHREF.setValueIsAdjusting(true); jLHREF.setModel(hrefmodel); jLHREF.addMouseListener(new java.awt.event.MouseAdapter() { public void mouseClicked(java.awt.event.MouseEvent evt) { jLHREFMouseClicked(evt); } } ); jSPHREF.setViewportView(jLHREF); gridBagConstraints1 = new java.awt.GridBagConstraints(); gridBagConstraints1.gridx = 1; gridBagConstraints1.gridy = 3; gridBagConstraints1.fill = java.awt.GridBagConstraints.BOTH; gridBagConstraints1.weightx = 2.0; gridBagConstraints1.weighty = 1.0; getContentPane().add(jSPHREF, gridBagConstraints1); jLImage.setValueIsAdjusting(true); jLImage.setModel(imgmodel); jLImage.addMouseListener(new java.awt.event.MouseAdapter() { public void mouseClicked(java.awt.event.MouseEvent evt) { jLImageMouseClicked(evt); } } ); jSPImageList.setViewportView(jLImage); gridBagConstraints1 = new java.awt.GridBagConstraints(); gridBagConstraints1.gridx = 1; gridBagConstraints1.gridy = 2; gridBagConstraints1.fill = java.awt.GridBagConstraints.BOTH; gridBagConstraints1.weightx = 2.0; gridBagConstraints1.weighty = 1.0; getContentPane().add(jSPImageList, gridBagConstraints1); setJMenuBar(jMenuBar1); }//GEN-END:initComponents private void jButton3ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jButton3ActionPerformed // Add your handling code here: hrefmodel.removeAllElements(); }//GEN-LAST:event_jButton3ActionPerformed private void jButton1ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jButton1ActionPerformed // Add your handling code here: imgmodel.removeAllElements(); }//GEN-LAST:event_jButton1ActionPerformed private void jBGetPagesActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jBGetPagesActionPerformed // Add your handling code here: PageIndex=0; hrefmodel.clear(); hrefmodel.addElement(jTFGURL.getText()); while (PageIndex<100)//hrefmodel.size()) { jLHREF.setSelectedIndex(PageIndex); String s = (String)jLHREF.getSelectedValue(); jTFCURL.setText(s); // myGetPage(s); myGetLinks(s,1); System.out.println("Page Index = "+PageIndex); PageIndex++; } }//GEN-LAST:event_jBGetPagesActionPerformed private void jLImageMouseClicked(java.awt.event.MouseEvent evt) {//GEN-FIRST:event_jLImageMouseClicked // Add your handling code here: if (evt.getModifiers()==java.awt.event.InputEvent.BUTTON3_MASK) {int index= jLImage.locationToIndex(new Point(evt.getX(),evt.getY())); jLImage.setSelectedIndex(index); String tmp = (String)jLImage.getSelectedValue(); hs.remove(tmp); imgmodel.removeElementAt(index); return;} String s = (String)jLImage.getSelectedValue(); jTFCURL.setText(s); myGetImage(s); }//GEN-LAST:event_jLImageMouseClicked private void jTFURLActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jTFURLActionPerformed // Add your handling code here: }//GEN-LAST:event_jTFURLActionPerformed private void jLHREFMouseClicked(java.awt.event.MouseEvent evt) {//GEN-FIRST:event_jLHREFMouseClicked // Add your handling code here: if (evt.getModifiers()==java.awt.event.InputEvent.BUTTON3_MASK) {int index= jLHREF.locationToIndex(new Point(evt.getX(),evt.getY())); jLHREF.setSelectedIndex(index); String tmp = (String)jLHREF.getSelectedValue(); hs.remove(tmp); hrefmodel.removeElementAt(index); return;} String s = (String)jLHREF.getSelectedValue(); jTFCURL.setText(s); myGetPage(s); myGetLinks(s,0); }//GEN-LAST:event_jLHREFMouseClicked private void jEPDisplayHyperlinkUpdate(javax.swing.event.HyperlinkEvent evt) {//GEN-FIRST:event_jEPDisplayHyperlinkUpdate // Add your handling code here: if (evt.getEventType()==HyperlinkEvent.EventType.ACTIVATED) { jTFCURL.setText(evt.getURL().toString()); myGetPage(evt.getURL().toString()); myGetLinks(evt.getURL().toString(),0); } }//GEN-LAST:event_jEPDisplayHyperlinkUpdate private void jBGetPage(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jBGetPage // Add your handling code here: jTFCURL.setText(jTFGURL.getText()); myGetPage( jTFCURL.getText()); myGetLinks(jTFCURL.getText(),0); }//GEN-LAST:event_jBGetPage private void jMIExit(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jMIExit // Add your handling code here: System.exit(0); }//GEN-LAST:event_jMIExit /** Exit the Application */ private void exitForm(java.awt.event.WindowEvent evt) {//GEN-FIRST:event_exitForm System.exit (0); }//GEN-LAST:event_exitForm /** *here are my local functions */ private void myGetPage(String SURL) { setCursor( Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR) ); try { jEPDisplay.setPage( SURL ); } catch ( IOException io ) { JOptionPane.showMessageDialog( this, "Error retrieving specified URL", "Bad URL", JOptionPane.ERROR_MESSAGE ); } jTFCURL.setText(SURL); setCursor( Cursor.getPredefinedCursor(Cursor.DEFAULT_CURSOR) ); } private void myGetImage(String SURL) { setCursor( Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR) ); try { jLabGraphic.setIcon(new ImageIcon( new URL(SURL))); } catch (MalformedURLException mfe){System.out.println(mfe);} setCursor( Cursor.getPredefinedCursor(Cursor.DEFAULT_CURSOR) ); } private void myGetLinks(String SURL, int constrained) { setCursor( Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR) ); HTMLEditorKit htmlEditorKit = new HTMLEditorKit(); HTMLDocument htmlDoc = new HTMLDocument(); Reader htmlReader = null; String sbase; try {URL myurl = new URL(SURL); htmlReader = new InputStreamReader(myurl.openConnection().getInputStream()); htmlEditorKit.read(htmlReader, htmlDoc, 0); } catch (ChangedCharSetException e) {System.out.println("changed char set: " + e.getCharSetSpec()); String urlString = fetchURL(SURL); int pos = urlString.toLowerCase().indexOf("charset=iso") ; String htmlSource = urlString.substring(pos+11); htmlReader = new StringReader(htmlSource); try {htmlEditorKit.read(htmlReader, htmlDoc,0); } catch (IOException ie) {System.err.println("iread"+ie);} catch (BadLocationException ie) {System.err.println("iloc"+ie);} } catch (BadLocationException e) {System.err.println("bloc"+e);} catch (MalformedURLException murle) {System.err.println("MURL"+murle); } catch (IOException e) {System.err.println("IOE"+e);} URL base=htmlDoc.getBase(); if (base==null) sbase = fabricatebase(jTFCURL.getText()); else sbase=base.toString(); ElementIterator it = new ElementIterator(htmlDoc); javax.swing.text.Element elem; while ((elem = it.next()) != null) {if (elem.getName().equalsIgnoreCase("img")) {String src; if ((src = (String)elem.getAttributes().getAttribute (HTML.Attribute.SRC)) != null) {addelement(sbase, src, imgmodel,constrained);} } if( elem.getName().equalsIgnoreCase("frame")) {String href; if ((href = (String)elem.getAttributes().getAttribute (HTML.Attribute.SRC)) != null) {addelement(sbase, href, hrefmodel,constrained);} } } HTMLDocument.Iterator iterator = htmlDoc.getIterator(HTML.Tag.A); while(iterator.isValid()) {String href = (String)iterator.getAttributes().getAttribute(HTML.Attribute.HREF); if (href!=null) {addelement(sbase, href, hrefmodel, constrained); } iterator.next(); } setCursor( Cursor.getPredefinedCursor(Cursor.DEFAULT_CURSOR) ); } private String fabricatebase(String s) { String sbase; StringBuffer sb = new StringBuffer(s); int i = s.lastIndexOf('/'); if (i < s.length()) sbase = sb.substring(0,i+1); else sbase = sb.toString(); return sbase; } private void addelement(String base, String in, DefaultListModel model, int constrained) { String absurl,s; int i; s=in; if ((i = in.indexOf('#'))>=0) s = in.substring(0,i); if ((i = in.indexOf('?'))>=0) s = in.substring(0,i); if ((i = in.indexOf(".pdf"))>=0) return; if ((i = in.indexOf(".ps"))>=0) return; if ((i = in.indexOf(".xml"))>=0) return; if ((i = in.indexOf("mailto"))>=0) return; if (s.startsWith("http://")) absurl=s.trim(); else absurl=base.trim()+s.trim(); if (constrained == 1) {if (absurl.startsWith(jTFGURL.getText()) && !hs.contains(absurl)) { hs.add(absurl); model.addElement(absurl); } } else { if (!hs.contains(absurl)) { hs.add(absurl); model.addElement(absurl); } } } private static String fetchURL (String urlString) { // this routine works around character set changes -- not clear how it does it. StringWriter sw = new StringWriter(); PrintWriter pw = new PrintWriter(sw); try { URL url = new URL (urlString); InputStream content = (InputStream)url.getContent(); BufferedReader in = new BufferedReader (new InputStreamReader (content)); String line; while ((line = in.readLine()) != null) { pw.println (line); } in.close(); pw.close(); } catch (MalformedURLException e) {pw.println ("Invalid URL"); } catch (IOException e) { pw.println ("Error reading URL"); } return sw.toString(); }// end of fetchURL /** * @param args the command line arguments */ public static void main (String args[]) { new Spider ().show (); } // Variables declaration - do not modify//GEN-BEGIN:variables private javax.swing.JMenuBar jMenuBar1; private javax.swing.JMenu jMenu1; private javax.swing.JMenuItem jMenuItem1; private javax.swing.JMenuItem jMenuItem2; private javax.swing.JMenuItem jMIExit; private javax.swing.JMenu jMenu2; private javax.swing.JMenuItem jMenuItem4; private javax.swing.JMenuItem jMenuItem5; private javax.swing.JMenuItem jMenuItem6; private javax.swing.JMenu jMenu3; private javax.swing.JMenuItem jMenuItem7; private javax.swing.JMenuItem jMenuItem8; private javax.swing.JScrollPane jSPPage; private javax.swing.JEditorPane jEPDisplay; private javax.swing.JScrollPane jSPImage; private javax.swing.JLabel jLabGraphic; private javax.swing.JPanel jPanel1; private javax.swing.JPanel jPanel3; private javax.swing.JButton jBGetPage; private javax.swing.JTextField jTFGURL; private javax.swing.JButton jBGetPages; private javax.swing.JPanel jPanel2; private javax.swing.JButton jBClearImg; private javax.swing.JButton jBClearDocs; private javax.swing.JPanel jPanel4; private javax.swing.JPanel jPanel5; private javax.swing.JLabel jLabel3; private javax.swing.JTextField jTFCURL; private javax.swing.JPanel jPanel6; private javax.swing.JPanel jPanel7; private javax.swing.JButton jBDImage; private javax.swing.JButton jBDPage; private javax.swing.JScrollPane jSPHREF; private javax.swing.JList jLHREF; private javax.swing.JScrollPane jSPImageList; private javax.swing.JList jLImage; // End of variables declaration//GEN-END:variables // Begining of my global variables private HashSet hs; private DefaultListModel hrefmodel; private DefaultListModel imgmodel; private int PageIndex; }