Overview
Jsoup Iterate all elements of HTML illustration demonstrates to choose and repeat all elements of HTML document utilizing Jsoup.
Jsoup gives select technique which acknowledges CSS style selectors to choose the elements.
For choosing every one of the elements of HTML page, you have to pass “*” as the selector
“*” selector chooses every one of the elements of the HTML document. You would then be able to repeat over elements utilizing for circle.
Step 1 : HTML Source Code
We will use http://www.yudiz.com/blog/ for a data scraping of this webpage.
Author Name HTML Code:-
<span class="vcard author post-author test"> <a href="http://www.yudiz.com/author/sandeep-joshi/"> Sandeep Joshi </a> </span>
Blog Upload Date HTML Code:-
<span class="post-date updated">November 24, 2017</span>
Blog Title HTML Code:-
<div class="post-title"> <h2 class="entry-title" itemprop="headline"> <a href="http://www.yudiz.com/how-to-customize-your-app-icon/"> How to customize your app icon? </a> </h2> </div>
Note:- For Scraping you must have to find the unique HTML element tag for necessary field otherwise you should have to find by some other HTML element if the same HTML element is used for other purposes.
Step 2 : Android Source Code
Permissions to be needed in Manifest.xml :-
<uses-permission android:name="android.permission.INTERNET" />
Gradle Dependencies to be add :-
dependencies { implementation 'org.jsoup:jsoup:1.11.2' }
activity_main.xml
<?xml version="1.0" encoding="utf-8"?> <android.support.constraint.ConstraintLayout xmlns:android="http://schemas.android.com/apk/res/android" xmlns:app="http://schemas.android.com/apk/res-auto" xmlns:tools="http://schemas.android.com/tools" android:layout_width="match_parent" android:layout_height="match_parent" tools:context="com.jsoupdemo.MainActivity"> <android.support.v7.widget.RecyclerView android:id="@+id/act_recyclerview" android:layout_width="match_parent" android:layout_height="match_parent"> </android.support.v7.widget.RecyclerView> </android.support.constraint.ConstraintLayout>
row_data.xml
<?xml version="1.0" encoding="utf-8"?> <android.support.v7.widget.CardView xmlns:android="http://schemas.android.com/apk/res/android" android:layout_width="match_parent" android:layout_height="wrap_content" android:layout_margin="5dp"> <LinearLayout android:layout_width="match_parent" android:layout_height="wrap_content" android:orientation="vertical"> <TextView android:id="@+id/row_tv_blog_title" android:layout_width="match_parent" android:layout_height="wrap_content" android:layout_margin="5dp" android:textStyle="bold" /> <TextView android:id="@+id/row_tv_blog_author" android:layout_width="match_parent" android:layout_height="wrap_content" android:layout_margin="5dp" /> <TextView android:id="@+id/row_tv_blog_upload_date" android:layout_width="match_parent" android:layout_height="wrap_content" android:layout_margin="5dp" /> </LinearLayout> </android.support.v7.widget.CardView>
MainActivity.java
public class MainActivity extends AppCompatActivity { private ProgressDialog mProgressDialog; private String url = "http://www.yudiz.com/blog/"; private ArrayList<String> mAuthorNameList = new ArrayList<>(); private ArrayList<String> mBlogUploadDateList = new ArrayList<>(); private ArrayList<String> mBlogTitleList = new ArrayList<>(); @Override protected void onCreate(Bundle savedInstanceState) { super.onCreate(savedInstanceState); setContentView(R.layout.activity_main); new Description().execute(); } private class Description extends AsyncTask<Void, Void, Void> { String desc; @Override protected void onPreExecute() { super.onPreExecute(); mProgressDialog = new ProgressDialog(MainActivity.this); mProgressDialog.setTitle("Android Basic JSoup Tutorial"); mProgressDialog.setMessage("Loading..."); mProgressDialog.setIndeterminate(false); mProgressDialog.show(); } @Override protected Void doInBackground(Void... params) { try { // Connect to the web site Document mBlogDocument = Jsoup.connect(url).get(); // Using Elements to get the Meta data Elements mElementDataSize = mBlogDocument.select("div[class=author-date]"); // Locate the content attribute int mElementSize = mElementDataSize.size(); for (int i = 0; i < mElementSize; i++) { Elements mElementAuthorName = mBlogDocument.select("span[class=vcard author post-author test]").select("a").eq(i); String mAuthorName = mElementAuthorName.text(); Elements mElementBlogUploadDate = mBlogDocument.select("span[class=post-date updated]").eq(i); String mBlogUploadDate = mElementBlogUploadDate.text(); Elements mElementBlogTitle = mBlogDocument.select("h2[class=entry-title]").select("a").eq(i); String mBlogTitle = mElementBlogTitle.text(); mAuthorNameList.add(mAuthorName); mBlogUploadDateList.add(mBlogUploadDate); mBlogTitleList.add(mBlogTitle); } } catch (IOException e) { e.printStackTrace(); } return null; } @Override protected void onPostExecute(Void result) { // Set description into TextView RecyclerView mRecyclerView = (RecyclerView)findViewById(R.id.act_recyclerview); DataAdapter mDataAdapter = new DataAdapter(MainActivity.this, mBlogTitleList, mAuthorNameList, mBlogUploadDateList); RecyclerView.LayoutManager mLayoutManager = new LinearLayoutManager(getApplicationContext()); mRecyclerView.setLayoutManager(mLayoutManager); mRecyclerView.setAdapter(mDataAdapter); mProgressDialog.dismiss(); } } }
DataAdapter.java
public class DataAdapter extends RecyclerView.Adapter<DataAdapter.MyViewHolder> { private ArrayList<String> mBlogTitleList = new ArrayList<>(); private ArrayList<String> mAuthorNameList = new ArrayList<>(); private ArrayList<String> mBlogUploadDateList = new ArrayList<>(); private Activity mActivity; private int lastPosition = -1; public DataAdapter(MainActivity activity, ArrayList<String> mBlogTitleList, ArrayList<String> mAuthorNameList, ArrayList<String> mBlogUploadDateList) { this.mActivity = activity; this.mBlogTitleList = mBlogTitleList; this.mAuthorNameList = mAuthorNameList; this.mBlogUploadDateList = mBlogUploadDateList; } public class MyViewHolder extends RecyclerView.ViewHolder { private TextView tv_blog_title, tv_blog_author, tv_blog_upload_date; public MyViewHolder(View view) { super(view); tv_blog_title = (TextView) view.findViewById(R.id.row_tv_blog_title); tv_blog_author = (TextView) view.findViewById(R.id.row_tv_blog_author); tv_blog_upload_date = (TextView) view.findViewById(R.id.row_tv_blog_upload_date); } } @Override public MyViewHolder onCreateViewHolder(ViewGroup parent, int viewType) { View itemView = LayoutInflater.from(parent.getContext()) .inflate(R.layout.row_data, parent, false); return new MyViewHolder(itemView); } @Override public void onBindViewHolder(MyViewHolder holder, final int position) { holder.tv_blog_title.setText(mBlogTitleList.get(position)); holder.tv_blog_author.setText(mAuthorNameList.get(position)); holder.tv_blog_upload_date.setText(mBlogUploadDateList.get(position)); } @Override public int getItemCount() { return mBlogTitleList.size(); } }