Quantcast
Channel: Yudiz Solutions Ltd.
Viewing all articles
Browse latest Browse all 595

Data Scraping in Android using Jsoup(Java HTML Parser)

$
0
0

Overview

Jsoup Iterate all elements of HTML illustration demonstrates to choose and repeat all elements of HTML document utilizing Jsoup.

Jsoup gives select technique which acknowledges CSS style selectors to choose the elements.

For choosing every one of the elements of HTML page, you have to pass “*” as the selector

“*” selector chooses every one of the elements of the HTML document. You would then be able to repeat over elements utilizing for circle.

Step 1 : HTML Source Code

We will use http://www.yudiz.com/blog/ for a data scraping of this webpage.

Author Name HTML Code:-

<span class="vcard author post-author test">
<a href="http://www.yudiz.com/author/sandeep-joshi/">
Sandeep Joshi
</a>
</span>

Blog Upload Date HTML Code:-

<span class="post-date updated">November 24, 2017</span>

Blog Title HTML Code:-

<div class="post-title">
<h2 class="entry-title" itemprop="headline">
<a href="http://www.yudiz.com/how-to-customize-your-app-icon/">
How to customize your app icon?
</a>
</h2>
</div>

Note:- For Scraping you must have to find the unique HTML element tag for necessary field otherwise you should have to find by some other HTML element if the same HTML element is used for other purposes.

Step 2 : Android Source Code

Permissions to be needed in Manifest.xml :-

<uses-permission android:name="android.permission.INTERNET" />

Gradle Dependencies to be add :-

dependencies {
   implementation 'org.jsoup:jsoup:1.11.2'
}

activity_main.xml

<?xml version="1.0" encoding="utf-8"?>
<android.support.constraint.ConstraintLayout xmlns:android="http://schemas.android.com/apk/res/android"
   xmlns:app="http://schemas.android.com/apk/res-auto"
   xmlns:tools="http://schemas.android.com/tools"
   android:layout_width="match_parent"
   android:layout_height="match_parent"
   tools:context="com.jsoupdemo.MainActivity">

   <android.support.v7.widget.RecyclerView
       android:id="@+id/act_recyclerview"
       android:layout_width="match_parent"
       android:layout_height="match_parent">

   </android.support.v7.widget.RecyclerView>

</android.support.constraint.ConstraintLayout>

row_data.xml

<?xml version="1.0" encoding="utf-8"?>
<android.support.v7.widget.CardView xmlns:android="http://schemas.android.com/apk/res/android"
   android:layout_width="match_parent"
   android:layout_height="wrap_content"
   android:layout_margin="5dp">

   <LinearLayout
       android:layout_width="match_parent"
       android:layout_height="wrap_content"
       android:orientation="vertical">

       <TextView
           android:id="@+id/row_tv_blog_title"
           android:layout_width="match_parent"
           android:layout_height="wrap_content"
           android:layout_margin="5dp"
           android:textStyle="bold" />

       <TextView
           android:id="@+id/row_tv_blog_author"
           android:layout_width="match_parent"
           android:layout_height="wrap_content"
           android:layout_margin="5dp" />

       <TextView
           android:id="@+id/row_tv_blog_upload_date"
           android:layout_width="match_parent"
           android:layout_height="wrap_content"
           android:layout_margin="5dp" />
   </LinearLayout>
</android.support.v7.widget.CardView>

MainActivity.java

public class MainActivity extends AppCompatActivity {

   private ProgressDialog mProgressDialog;
   private String url = "http://www.yudiz.com/blog/";
   private ArrayList<String> mAuthorNameList = new ArrayList<>();
   private ArrayList<String> mBlogUploadDateList = new ArrayList<>();
   private ArrayList<String> mBlogTitleList = new ArrayList<>();

   @Override
   protected void onCreate(Bundle savedInstanceState) {
       super.onCreate(savedInstanceState);
       setContentView(R.layout.activity_main);
       new Description().execute();

   }

   private class Description extends AsyncTask<Void, Void, Void> {
       String desc;

       @Override
       protected void onPreExecute() {
           super.onPreExecute();
           mProgressDialog = new ProgressDialog(MainActivity.this);
           mProgressDialog.setTitle("Android Basic JSoup Tutorial");
           mProgressDialog.setMessage("Loading...");
           mProgressDialog.setIndeterminate(false);
           mProgressDialog.show();
       }

       @Override
       protected Void doInBackground(Void... params) {
           try {
               // Connect to the web site
               Document mBlogDocument = Jsoup.connect(url).get();
               // Using Elements to get the Meta data
               Elements mElementDataSize = mBlogDocument.select("div[class=author-date]");
               // Locate the content attribute
               int mElementSize = mElementDataSize.size();

               for (int i = 0; i < mElementSize; i++) {
                   Elements mElementAuthorName = mBlogDocument.select("span[class=vcard author post-author test]").select("a").eq(i);
                   String mAuthorName = mElementAuthorName.text();

                   Elements mElementBlogUploadDate = mBlogDocument.select("span[class=post-date updated]").eq(i);
                   String mBlogUploadDate = mElementBlogUploadDate.text();

                   Elements mElementBlogTitle = mBlogDocument.select("h2[class=entry-title]").select("a").eq(i);
                   String mBlogTitle = mElementBlogTitle.text();

                   mAuthorNameList.add(mAuthorName);
                   mBlogUploadDateList.add(mBlogUploadDate);
                   mBlogTitleList.add(mBlogTitle);
               }
           } catch (IOException e) {
               e.printStackTrace();
           }
           return null;
       }

       @Override
       protected void onPostExecute(Void result) {
           // Set description into TextView

           RecyclerView mRecyclerView = (RecyclerView)findViewById(R.id.act_recyclerview);

           DataAdapter mDataAdapter = new DataAdapter(MainActivity.this, mBlogTitleList, mAuthorNameList, mBlogUploadDateList);
           RecyclerView.LayoutManager mLayoutManager = new LinearLayoutManager(getApplicationContext());
           mRecyclerView.setLayoutManager(mLayoutManager);
           mRecyclerView.setAdapter(mDataAdapter);

           mProgressDialog.dismiss();
       }
   }
}

DataAdapter.java

public class DataAdapter extends RecyclerView.Adapter<DataAdapter.MyViewHolder> {

   private ArrayList<String> mBlogTitleList = new ArrayList<>();
   private ArrayList<String> mAuthorNameList = new ArrayList<>();
   private ArrayList<String> mBlogUploadDateList = new ArrayList<>();
   private Activity mActivity;
   private int lastPosition = -1;

   public DataAdapter(MainActivity activity, ArrayList<String> mBlogTitleList, ArrayList<String> mAuthorNameList, ArrayList<String> mBlogUploadDateList) {
       this.mActivity = activity;
       this.mBlogTitleList = mBlogTitleList;
       this.mAuthorNameList = mAuthorNameList;
       this.mBlogUploadDateList = mBlogUploadDateList;
   }

   public class MyViewHolder extends RecyclerView.ViewHolder {

       private TextView tv_blog_title, tv_blog_author, tv_blog_upload_date;

       public MyViewHolder(View view) {
           super(view);
           tv_blog_title = (TextView) view.findViewById(R.id.row_tv_blog_title);
           tv_blog_author = (TextView) view.findViewById(R.id.row_tv_blog_author);
           tv_blog_upload_date = (TextView) view.findViewById(R.id.row_tv_blog_upload_date);
       }
   }

   @Override
   public MyViewHolder onCreateViewHolder(ViewGroup parent, int viewType) {
       View itemView = LayoutInflater.from(parent.getContext())
               .inflate(R.layout.row_data, parent, false);

       return new MyViewHolder(itemView);
   }

   @Override
   public void onBindViewHolder(MyViewHolder holder, final int position) {
       holder.tv_blog_title.setText(mBlogTitleList.get(position));
       holder.tv_blog_author.setText(mAuthorNameList.get(position));
       holder.tv_blog_upload_date.setText(mBlogUploadDateList.get(position));
   }

   @Override
   public int getItemCount() {
       return mBlogTitleList.size();
   }
}

Step 3 : Test


Viewing all articles
Browse latest Browse all 595

Trending Articles