爬虫获取数据时,可能会遇到AJAX加载的页面,如果无法分析出接口的话,就只能使用秘密武器——WebDriverDownloader。不过最好还是分析出接口为好,WebDriver的性能实在是太低了。
#region WebDriverDownloader var option = new Option(); option.LoadImage = false; option.LoadFlashPlayer = false; option.AlwaysLoadNoFocusLibrary = false; option.Headless = true; IWebDriverAction webDriverAction = new Click();//webDriver事件 ListwebDriverActions = new List ();//webDriver事件链表 webDriverActions.Add(webDriverAction);//add webDriver事件 var downloader = new WebDriverDownloader(Browser.Chrome, 5000, option); downloader.Actions = webDriverActions;//绑定到浏览器,等其执行完成load后执行 spider.Downloader = downloader; #endregion
////// 点击操作的实现 /// public class Click : IWebDriverAction { ////// 滚动次数 /// public int ClickTimes { get; set; } = 1; ////// 点击操作的具体实现 /// /// WebDriver ///是否操作成功 public bool Invoke(RemoteWebDriver webDriver) { try { //webDriver.Manage().Window.Maximize(); for (var i = 0; i < ClickTimes; i++) { Thread.Sleep(2000); string sJavascriptCode = "window.open("https://www.baidu.com/");"; webDriver.ExecuteScript(sJavascriptCode); Thread.Sleep(1000); } } catch (Exception) { return false; } return true; } }